# Pandas

### Series

In [2]:
import pandas as pd

In [3]:
groceries = pd.Series(data = [30, 6, 'Yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])

# We display the Groceries Pandas Series
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [4]:
groceries['milk']


'Yes'

In [213]:
type(groceries)

pandas.core.series.Series

In [214]:
groceries.dtype

dtype('O')

In [89]:
# We print some information about Groceries
print('Groceries has shape:', groceries.shape)
print('Groceries has dimension:', groceries.ndim)
print('Groceries has a total of', groceries.size, 'elements')

Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of 4 elements


In [6]:
groceries['eggs']

30

In [14]:
groceries[[0,2]]

eggs     30
milk    Yes
dtype: object

In [220]:
groceries[['eggs','apples']]

eggs      30
apples     6
dtype: object

# .loc / iloc

In [7]:
a=pd.Series(['IND','US','UK','IRAN'],index=[1,2,3,4])

In [8]:
a

1     IND
2      US
3      UK
4    IRAN
dtype: object

In [12]:
a.iloc[2]

'UK'

In [14]:
a[2]

'US'

In [13]:
a.loc[3]

'UK'

In [101]:
groceries.loc[['eggs','apples']]

eggs      30
apples     6
dtype: object

In [99]:
groceries.iloc[[2,3]]

milk     Yes
bread     No
dtype: object

In [222]:
groceries.drop('bread')

eggs       30
apples      6
milk      Yes
dtype: object

In [221]:
groceries.isnull()

eggs      False
apples    False
milk      False
bread     False
dtype: bool

# Pandas - Creating dataframes from dictionaries 

In [21]:
data = {"name": ["Sophia", "John", "Jennifer", "Edgar", "Patrick"],
       "gender": ["F", "M", "F", "M", "M"],
       "country": ["Bulgaria", "USA", "USA", "England", "Netherlands"],
       "age": [23, 24, 46, 13, 72]}

In [22]:
data

{'age': [23, 24, 46, 13, 72],
 'country': ['Bulgaria', 'USA', 'USA', 'England', 'Netherlands'],
 'gender': ['F', 'M', 'F', 'M', 'M'],
 'name': ['Sophia', 'John', 'Jennifer', 'Edgar', 'Patrick']}

In [23]:
sample_df = pd.DataFrame(data)
sample_df

Unnamed: 0,age,country,gender,name
0,23,Bulgaria,F,Sophia
1,24,USA,M,John
2,46,USA,F,Jennifer
3,13,England,M,Edgar
4,72,Netherlands,M,Patrick


## Reading CSV file in pandas directly

_** Loading the data and performing analytical operations as in SQL/SAS**_

In [24]:
df=pd.read_csv('Customer_churn_prediction.csv')

In [18]:
#sas_dataset

In [25]:
type(df)

pandas.core.frame.DataFrame

In [26]:
# displays all the columns in df

df.columns

Index(['Loan_ID', 'Gender', 'Married', 'Dependents', 'Education',
       'Self_Employed', 'ApplicantIncome', 'CoapplicantIncome', 'LoanAmount',
       'Loan_Amount_Term', 'Credit_History', 'Property_Area', 'Loan_Status'],
      dtype='object')

In [65]:
# displays no.of rows and columns
df.shape

(614, 13)

In [27]:
# Displays top 5 records

df.head(10)

# Try df.tail() also

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
6,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
8,LP001018,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
9,LP001020,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N


In [28]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 614 entries, 0 to 613
Data columns (total 13 columns):
Loan_ID              614 non-null object
Gender               601 non-null object
Married              611 non-null object
Dependents           599 non-null object
Education            614 non-null object
Self_Employed        582 non-null object
ApplicantIncome      614 non-null int64
CoapplicantIncome    614 non-null float64
LoanAmount           592 non-null float64
Loan_Amount_Term     600 non-null float64
Credit_History       564 non-null float64
Property_Area        614 non-null object
Loan_Status          614 non-null object
dtypes: float64(4), int64(1), object(8)
memory usage: 62.4+ KB


In [31]:
df[['LoanAmount']].describe()

Unnamed: 0,LoanAmount
count,592.0
mean,146.412162
std,85.587325
min,9.0
25%,100.0
50%,128.0
75%,168.0
max,700.0


In [44]:
df.describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,614.0,614.0,592.0,600.0,564.0
mean,5403.459283,1621.245798,146.412162,342.0,0.842199
std,6109.041673,2926.248369,85.587325,65.12041,0.364878
min,150.0,0.0,9.0,12.0,0.0
25%,2877.5,0.0,100.0,360.0,1.0
50%,3812.5,1188.5,128.0,360.0,1.0
75%,5795.0,2297.25,168.0,360.0,1.0
max,81000.0,41667.0,700.0,480.0,1.0


In [37]:
df.describe()[['LoanAmount']].loc['mean']

LoanAmount    146.412162
Name: mean, dtype: float64

In [25]:
#df.describe()[['LoanAmount']].loc['count']

In [26]:
#df.describe()[['LoanAmount']].iloc[0]

In [38]:
# Total nuber of Null records

df.isnull().sum().sum()

149

In [48]:
df[['Married']].head()

Unnamed: 0,Married
0,No
1,Yes
2,Yes
3,Yes
4,No


In [45]:
df[['Married']].duplicated()

0      False
1      False
2       True
3       True
4       True
5       True
6       True
7       True
8       True
9       True
10      True
11      True
12      True
13      True
14      True
15      True
16      True
17      True
18      True
19      True
20      True
21      True
22      True
23      True
24      True
25      True
26      True
27      True
28      True
29      True
       ...  
584     True
585     True
586     True
587     True
588     True
589     True
590     True
591     True
592     True
593     True
594     True
595     True
596     True
597     True
598     True
599     True
600     True
601     True
602     True
603     True
604     True
605     True
606     True
607     True
608     True
609     True
610     True
611     True
612     True
613     True
Length: 614, dtype: bool

## Break this code and run to see whats happening

In [56]:
# Total nuber of Null records

df.isnull() #.sum().sum()
#df.isnull().sum() #.sum()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,False,False,False,False,False,False,False,False,True,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False
5,False,False,False,False,False,False,False,False,False,False,False,False,False
6,False,False,False,False,False,False,False,False,False,False,False,False,False
7,False,False,False,False,False,False,False,False,False,False,False,False,False
8,False,False,False,False,False,False,False,False,False,False,False,False,False
9,False,False,False,False,False,False,False,False,False,False,False,False,False


## Fetching records where Married column is Null

In [51]:
df[df.Married.isnull()]

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
104,LP001357,Male,,,Graduate,No,3816,754.0,160.0,360.0,1.0,Urban,Y
228,LP001760,Male,,,Graduate,No,4758,0.0,158.0,480.0,1.0,Semiurban,Y
435,LP002393,Female,,,Graduate,No,10047,0.0,,240.0,1.0,Semiurban,Y


In [58]:
df.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


### If the row index needs to be changed 

In [53]:
df.set_index('Loan_ID',inplace=True)


In [57]:
df.reset_index(drop=True, inplace=True)


## Accessing single column in a dataframe

In [44]:
df[['Gender']].head()

Unnamed: 0,Gender
0,Male
1,Male
2,Male
3,Male
4,Male


## Accessing Multiple columns in a dataframe

In [47]:
df[['Gender', 'Married','Loan_Status']].head()

Unnamed: 0,Gender,Married,Loan_Status
0,Male,No,Y
1,Male,Yes,N
2,Male,Yes,Y
3,Male,Yes,Y
4,Male,No,Y


## Accessing Rows in a dataframe using row index

In [263]:
df.iloc[[0]]

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y


In [144]:
df.loc[[3, 7]]

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N


In [145]:
df.loc[3:5]

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y


In [68]:
#df.loc[[0]]

## Multiple ways to access records in a dataframe using row index and column labels

In [257]:
df[2:4][['Gender']]

Unnamed: 0,Gender
2,Male
3,Male


In [260]:
df.iloc[2:6][['LoanAmount']]

Unnamed: 0,LoanAmount
2,66.0
3,120.0
4,141.0
5,267.0


In [58]:
df[2:4][['Gender']]

Unnamed: 0,Gender
2,Male
3,Male


In [137]:
# Accessing individual elements in a df
#dataframe[column][row]. 

# df['Gender'][0]


In [None]:
# dataframe.insert(loc,label,data)

# df.drop(['Gender'], axis = 1) - Dropping the column Gender 

In [156]:
df2 = df.set_index('Loan_ID', inplace=False)
df2.head()

Unnamed: 0_level_0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
Loan_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y


In [166]:
df2.reset_index()

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
0,LP001002,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
4,LP001008,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
6,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
8,LP001018,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
9,LP001020,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N


## Add new column in the dataframe

In [77]:
df.head()

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,AppIncome_percent
0,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y,5.849
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N,4.583
2,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y,3.0
3,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y,2.583
4,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y,6.0


In [89]:
df.Gender.value_counts()


Male      489
Female    112
Name: Gender, dtype: int64

In [94]:
#df(df['Married','Gender']).value_counts()

In [76]:
df['AppIncome_percent'] = df['ApplicantIncome'] / 1000

In [80]:
df[df['Gender'] == 'Male']
#= df['ApplicantIncome'] / 1000

Unnamed: 0,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,AppIncome_percent
0,Male,No,0,Graduate,No,5849,0.0,,360.0,1.0,Urban,Y,5.849
1,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N,4.583
2,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y,3.000
3,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y,2.583
4,Male,No,0,Graduate,No,6000,0.0,141.0,360.0,1.0,Urban,Y,6.000
5,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y,5.417
6,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y,2.333
7,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N,3.036
8,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y,4.006
9,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N,12.841


## Count unique values in a column

In [281]:
df['Gender'].value_counts()

Male      489
Female    112
Name: Gender, dtype: int64

In [292]:
#df[df['Gender'] == 'Male'][['Married']].value_counts()

In [103]:

a=df[(df['Gender'] == 'Male') & (df['Married'] == 'Yes')].describe()[['LoanAmount']]
#loc['mean']

In [107]:
a

Unnamed: 0,LoanAmount
count,343.0
mean,154.011662
std,83.025254
min,17.0
25%,108.0
50%,132.0
75%,180.0
max,600.0


In [108]:
a.loc[['mean','std']]

Unnamed: 0,LoanAmount
mean,154.011662
std,83.025254


In [70]:

df[(df['Gender'] == 'Male') & (df['Married'] == 'Yes')].describe()

Unnamed: 0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
count,357.0,357.0,343.0,348.0,326.0
mean,5529.540616,1828.330308,154.011662,335.931034,0.846626
std,6743.209021,2096.367198,83.025254,67.342095,0.360902
min,150.0,0.0,17.0,12.0,0.0
25%,2882.0,0.0,108.0,360.0,1.0
50%,3875.0,1619.0,132.0,360.0,1.0
75%,5829.0,2500.0,180.0,360.0,1.0
max,81000.0,20000.0,600.0,480.0,1.0


In [None]:
pd.read_sql_query()

In [182]:
# Records only with the married status is Yes
df[df['Married'] == 'Yes']

Unnamed: 0,Loan_ID,Gender,Married,Dependents,Education,Self_Employed,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status
1,LP001003,Male,Yes,1,Graduate,No,4583,1508.0,128.0,360.0,1.0,Rural,N
2,LP001005,Male,Yes,0,Graduate,Yes,3000,0.0,66.0,360.0,1.0,Urban,Y
3,LP001006,Male,Yes,0,Not Graduate,No,2583,2358.0,120.0,360.0,1.0,Urban,Y
5,LP001011,Male,Yes,2,Graduate,Yes,5417,4196.0,267.0,360.0,1.0,Urban,Y
6,LP001013,Male,Yes,0,Not Graduate,No,2333,1516.0,95.0,360.0,1.0,Urban,Y
7,LP001014,Male,Yes,3+,Graduate,No,3036,2504.0,158.0,360.0,0.0,Semiurban,N
8,LP001018,Male,Yes,2,Graduate,No,4006,1526.0,168.0,360.0,1.0,Urban,Y
9,LP001020,Male,Yes,1,Graduate,No,12841,10968.0,349.0,360.0,1.0,Semiurban,N
10,LP001024,Male,Yes,2,Graduate,No,3200,700.0,70.0,360.0,1.0,Urban,Y
11,LP001027,Male,Yes,2,Graduate,,2500,1840.0,109.0,360.0,1.0,Urban,Y


# groupby

In [190]:
df['Gender'].value_counts()

Male      489
Female    112
Name: Gender, dtype: int64

In [117]:
df.groupby('Gender').describe()['AppIncome_percent'][['std']]

Unnamed: 0_level_0,std
Gender,Unnamed: 1_level_1
Female,3.585381
Male,6.185789


In [119]:
df.groupby('Gender').max()

Unnamed: 0_level_0,Education,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History,Property_Area,Loan_Status,AppIncome_percent
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Female,Not Graduate,19484,41667.0,600.0,480.0,1.0,Urban,Y,19.484
Male,Not Graduate,81000,33837.0,650.0,480.0,1.0,Urban,Y,81.0


In [315]:
df.groupby(['Gender']).sum()

Unnamed: 0_level_0,ApplicantIncome,CoapplicantIncome,LoanAmount,Loan_Amount_Term,Credit_History
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Female,520069,124097.0,13810.0,38400.0,84.0
Male,2663319,852293.919989,70155.0,162360.0,382.0


In [318]:
df.groupby(['Gender']).describe()

Unnamed: 0_level_0,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,CoapplicantIncome,CoapplicantIncome,...,LoanAmount,LoanAmount,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Female,112.0,4643.473214,3585.381488,210.0,2661.5,3583.0,4606.5,19484.0,112.0,1108.008929,...,138.0,600.0,109.0,352.293578,56.722081,36.0,360.0,360.0,360.0,480.0
Male,489.0,5446.460123,6185.789262,150.0,2917.0,3865.0,5923.0,81000.0,489.0,1742.932352,...,175.0,650.0,478.0,339.665272,67.08914,12.0,360.0,360.0,360.0,480.0


In [121]:
pd.read_csv('https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv',sep='\t')

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
6,3,1,Side of Chips,,$1.69
7,4,1,Steak Burrito,"[Tomatillo Red Chili Salsa, [Fajita Vegetables...",$11.75
8,4,1,Steak Soft Tacos,"[Tomatillo Green Chili Salsa, [Pinto Beans, Ch...",$9.25
9,5,1,Steak Burrito,"[Fresh Tomato Salsa, [Rice, Black Beans, Pinto...",$9.25


In [321]:
df.groupby('Gender').describe()['LoanAmount'][['mean']]

Unnamed: 0_level_0,mean
Gender,Unnamed: 1_level_1
Female,126.697248
Male,149.265957


In [197]:
df.groupby('Gender')

<pandas.core.groupby.DataFrameGroupBy object at 0x000000C1C0165A20>

In [323]:
df.groupby(['Gender','Married']).describe()

Unnamed: 0_level_0,Unnamed: 1_level_0,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,ApplicantIncome,CoapplicantIncome,CoapplicantIncome,...,LoanAmount,LoanAmount,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term,Loan_Amount_Term
Unnamed: 0_level_1,Unnamed: 1_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Gender,Married,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2
Female,No,80.0,4503.7875,3333.16079,416.0,2706.75,3525.5,4588.75,18165.0,80.0,1020.0125,...,135.0,350.0,77.0,355.012987,53.1459,36.0,360.0,360.0,360.0,480.0
Female,Yes,31.0,4829.645161,4152.831039,210.0,2564.5,3625.0,4595.5,19484.0,31.0,1370.83871,...,142.0,600.0,31.0,349.16129,63.069854,84.0,360.0,360.0,360.0,480.0
Male,No,130.0,5236.146154,4379.170994,1442.0,2973.25,3861.5,5985.25,37719.0,130.0,1529.430769,...,155.0,650.0,128.0,348.5625,65.100256,36.0,360.0,360.0,360.0,480.0
Male,Yes,357.0,5529.540616,6743.209021,150.0,2882.0,3875.0,5829.0,81000.0,357.0,1828.330308,...,180.0,600.0,348.0,335.931034,67.342095,12.0,360.0,360.0,360.0,480.0


In [71]:
df['AppIncome_percent'] = df['ApplicantIncome'] / 100