# DataFrame

DataFrame is probably the most important and most commonly used object in pandas. A DataFrame is basically a collection of a number of Series that share common indices. A DataFrame arranges data in rows and columns in a tabular structure. For an indepth study of DataFrames see [this tutorial](https://www.datacamp.com/community/tutorials/pandas-tutorial-dataframe-python) from datacamp.

In [1]:
import pandas as pd

In [2]:
height_weight_df = pd.read_csv('height_weight.csv') 

In [3]:
height_weight_df.head(n=3)

Unnamed: 0,Person,Height,Weight
0,A,72,186
1,B,69,205
2,C,70,201


In [4]:
height_weight_df.describe()

Unnamed: 0,Height,Weight
count,5.0,5.0
mean,66.0,161.2
std,6.284903,51.577127
min,57.0,89.0
25%,62.0,125.0
50%,69.0,186.0
75%,70.0,201.0
max,72.0,205.0


In [5]:
height_weight_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 3 columns):
Person    5 non-null object
Height    5 non-null int64
Weight    5 non-null int64
dtypes: int64(2), object(1)
memory usage: 200.0+ bytes


In [6]:
height_weight_df.set_index(['Person'])

Unnamed: 0_level_0,Height,Weight
Person,Unnamed: 1_level_1,Unnamed: 2_level_1
A,72,186
B,69,205
C,70,201
D,62,125
E,57,89


In [7]:
height_weight_df.set_index(['Person'],inplace=True)

In [8]:
height_weight_df

Unnamed: 0_level_0,Height,Weight
Person,Unnamed: 1_level_1,Unnamed: 2_level_1
A,72,186
B,69,205
C,70,201
D,62,125
E,57,89


In [9]:
height_weight_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 5 entries, A to E
Data columns (total 2 columns):
Height    5 non-null int64
Weight    5 non-null int64
dtypes: int64(2)
memory usage: 120.0+ bytes


In [10]:
height_weight_df['Height']

Person
A    72
B    69
C    70
D    62
E    57
Name: Height, dtype: int64

In [11]:
height_weight_df['Weight']

Person
A    186
B    205
C    201
D    125
E     89
Name: Weight, dtype: int64

In [12]:
type(height_weight_df['Height'])

pandas.core.series.Series

In [13]:
missing_df = pd.DataFrame(data=[[65,121],[60]],index=['F','G'],columns=['Height','Weight'])

In [14]:
missing_df

Unnamed: 0,Height,Weight
F,65,121.0
G,60,


In [15]:
updated_df = height_weight_df.append(missing_df)
updated_df

Unnamed: 0,Height,Weight
A,72,186.0
B,69,205.0
C,70,201.0
D,62,125.0
E,57,89.0
F,65,121.0
G,60,


In [16]:
updated_df['Height'] += 5
updated_df['Weight'] -=5

In [17]:
updated_df

Unnamed: 0,Height,Weight
A,77,181.0
B,74,200.0
C,75,196.0
D,67,120.0
E,62,84.0
F,70,116.0
G,65,


In [18]:
updated_df.fillna(updated_df.mean())

Unnamed: 0,Height,Weight
A,77,181.0
B,74,200.0
C,75,196.0
D,67,120.0
E,62,84.0
F,70,116.0
G,65,149.5


In [19]:
updated_df

Unnamed: 0,Height,Weight
A,77,181.0
B,74,200.0
C,75,196.0
D,67,120.0
E,62,84.0
F,70,116.0
G,65,


In [20]:
updated_df.dropna(inplace=True)

In [21]:
updated_df

Unnamed: 0,Height,Weight
A,77,181.0
B,74,200.0
C,75,196.0
D,67,120.0
E,62,84.0
F,70,116.0


In [22]:
lbs_to_kg_ratio = 0.453592
inch_to_meter_ratio = 0.0254

In [23]:
updated_df['Height'] *= inch_to_meter_ratio
updated_df['Weight'] *= lbs_to_kg_ratio

In [24]:
updated_df

Unnamed: 0,Height,Weight
A,1.9558,82.100152
B,1.8796,90.7184
C,1.905,88.904032
D,1.7018,54.43104
E,1.5748,38.101728
F,1.778,52.616672


In [25]:
updated_df['BMI'] = updated_df['Weight']/(updated_df['Height']**2)

In [26]:
updated_df

Unnamed: 0,Height,Weight,BMI
A,1.9558,82.100152,21.46323
B,1.8796,90.7184,25.678196
C,1.905,88.904032,24.498049
D,1.7018,54.43104,18.794449
E,1.5748,38.101728,15.363631
F,1.778,52.616672,16.644083


In [28]:
updated_df.to_csv('BMI.csv',index_label='Person')