# DataFrame Basics Tutorial

#### Dataframe is most commonly used object in pandas. It is a table like datastructure containing rows and columns similar to excel spreadsheet

In [1]:
import pandas as pd

In [2]:
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Sunny', 'Sunny']
}

In [3]:
type(weather_data)

dict

In [4]:
weather_data

{'day': ['1/1/2017',
  '1/2/2017',
  '1/3/2017',
  '1/4/2017',
  '1/5/2017',
  '1/6/2017'],
 'temperature': [32, 35, 28, 24, 32, 31],
 'windspeed': [6, 7, 2, 7, 4, 2],
 'event': ['Rain', 'Sunny', 'Snow', 'Snow', 'Sunny', 'Sunny']}

In [None]:
df=pd.DataFrame(weather_data)           #convert dict to dataframe
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [6]:
type(df)

pandas.core.frame.DataFrame

In [7]:
df.shape

(6, 4)

In [10]:
newdf=df[2:5]
newdf

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny


In [None]:
newdf=df.iloc[2:5,0:-1]     #row 2-->4     , column 0--->exclusive -1
newdf

Unnamed: 0,day,temperature,windspeed
2,1/3/2017,28,2
3,1/4/2017,24,7
4,1/5/2017,32,4


In [None]:
df.index[df.day=="1/4/2017"]          #help in search

Index([3], dtype='int64')

In [13]:
print(df.columns)
columns_name=df.columns.to_list()
columns_name

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')


['day', 'temperature', 'windspeed', 'event']

In [14]:
twodf=df[['day','event']]
twodf

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Sunny
5,1/6/2017,Sunny


In [15]:
del twodf

### Operations On DataFrame

In [16]:
print(df.temperature.mean())
print(df['temperature'].std())

30.333333333333332
3.8297084310253524


In [17]:
df[df['temperature']>30]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [18]:
len(df[df['temperature']>30])

4

In [None]:
df['day'] [df['temperature']==df['temperature'].min()]             #must use condtion

3    1/4/2017
Name: day, dtype: object

In [None]:
data=df.temperature.sort_values()            #i can sort but external datafram
data

3    24
2    28
5    31
0    32
4    32
1    35
Name: temperature, dtype: int64

In [23]:
data=df.temperature.sort_values(ascending=False)            #i can sort but external datafram
data

1    35
0    32
4    32
5    31
2    28
3    24
Name: temperature, dtype: int64

In [24]:
df['temperature'].mean()

np.float64(30.333333333333332)

In [25]:
df['temperature'][3:6].mean()         #mean row 3-5

np.float64(29.0)

In [26]:
df['event'].max()            #return high event ->ASSIC

'Sunny'

In [27]:
df.event.value_counts()

event
Sunny    3
Snow     2
Rain     1
Name: count, dtype: int64

In [None]:
df.event.value_counts().max()      #return value count

np.int64(3)

In [29]:
df.event.value_counts().idxmax()

'Sunny'

In [30]:
df.event.mode()

0    Sunny
Name: event, dtype: object

## Set Index

In [31]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [None]:
df.index         #0-->5

RangeIndex(start=0, stop=6, step=1)

In [34]:
df.set_index('day',inplace=True)

In [35]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Sunny
1/6/2017,31,2,Sunny


In [None]:
df.shape        #3 column (not 4) -->column day convert to index 

(6, 3)

In [None]:
df.loc["1/1/2017" : "1/4/2017"]               #day is  index + 1/4/2017    not exculsive

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow


In [38]:
df.reset_index(inplace=True)

In [39]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [40]:
newdf=df.copy()
newdf.set_index('day',inplace=True)

In [41]:
newdf

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Sunny
1/6/2017,31,2,Sunny


In [42]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Sunny
5,1/6/2017,31,2,Sunny


In [43]:
newdf.set_index("event", inplace=True)
newdf

Unnamed: 0_level_0,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1
Rain,32,6
Sunny,35,7
Snow,28,2
Snow,24,7
Sunny,32,4
Sunny,31,2


In [44]:
newdf.loc["Sunny"]

Unnamed: 0_level_0,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1
Sunny,35,7
Sunny,32,4
Sunny,31,2


In [45]:
newdf.loc["Snow"]

Unnamed: 0_level_0,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1
Snow,28,2
Snow,24,7


In [None]:
import numpy as np
df["NewData"]=np.arange(1,7)            #1-->6
df

Unnamed: 0,day,temperature,windspeed,event,NewData
0,1/1/2017,32,6,Rain,1
1,1/2/2017,35,7,Sunny,2
2,1/3/2017,28,2,Snow,3
3,1/4/2017,24,7,Snow,4
4,1/5/2017,32,4,Sunny,5
5,1/6/2017,31,2,Sunny,6


In [49]:
mylist=[]
for i in df.temperature:
    if i ==32:
        mylist.append(True)
    else:
        mylist.append(False)

df['Check']=mylist

In [50]:
df

Unnamed: 0,day,temperature,windspeed,event,NewData,Check
0,1/1/2017,32,6,Rain,1,True
1,1/2/2017,35,7,Sunny,2,False
2,1/3/2017,28,2,Snow,3,False
3,1/4/2017,24,7,Snow,4,False
4,1/5/2017,32,4,Sunny,5,True
5,1/6/2017,31,2,Sunny,6,False


In [51]:
df["Names"]=np.array(['Ahmed','Ali','Gehad',"Emad" , "Anas" , "Amr"])
df

Unnamed: 0,day,temperature,windspeed,event,NewData,Check,Names
0,1/1/2017,32,6,Rain,1,True,Ahmed
1,1/2/2017,35,7,Sunny,2,False,Ali
2,1/3/2017,28,2,Snow,3,False,Gehad
3,1/4/2017,24,7,Snow,4,False,Emad
4,1/5/2017,32,4,Sunny,5,True,Anas
5,1/6/2017,31,2,Sunny,6,False,Amr
