# Pandas - DataFrame Basics

In [1]:
import pandas as pd

In [2]:
# Loading data from files

df = pd.read_csv("weather_data.csv")
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [3]:
# Creating data frame from dictionary

weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}


df = pd.DataFrame(weather_data) #Creating the data frame
df # printing the full data frame 


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [4]:
# printing the shape, prints in tuple format

df.shape 

(6, 4)

In [5]:
# if you want to save the row and column 

rows, columns = df.shape
rows


6

In [6]:
df.head  # prints initial few rows



<bound method NDFrame.head of         day  temperature  windspeed  event
0  1/1/2017           32          6   Rain
1  1/2/2017           35          7  Sunny
2  1/3/2017           28          2   Snow
3  1/4/2017           24          7   Snow
4  1/5/2017           32          4   Rain
5  1/6/2017           31          2  Sunny>

In [8]:
df.head()


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [9]:
df.head(2) # print first 2 only


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [10]:
df.tail()


Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [11]:
df.tail(2) # print last 2 only 

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [12]:
df


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [13]:
# Slicing 

df[2:5]  #print row 2,3,4

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [14]:
df[:] # print everything, or simple df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [17]:
df.columns # to print columns


Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [18]:
df.day 

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [19]:
df.event


0     Rain
1    Sunny
2     Snow
3     Snow
4     Rain
5    Sunny
Name: event, dtype: object

In [20]:
df['event']

0     Rain
1    Sunny
2     Snow
3     Snow
4     Rain
5    Sunny
Name: event, dtype: object

In [21]:
type(df['event'])

pandas.core.series.Series

In [22]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [25]:
df[['event','day']] # printing certain column only

Unnamed: 0,event,day
0,Rain,1/1/2017
1,Sunny,1/2/2017
2,Snow,1/3/2017
3,Snow,1/4/2017
4,Rain,1/5/2017
5,Sunny,1/6/2017


# DataFrame Basic operation

In [26]:
df


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [27]:
df['temperature']

0    32
1    35
2    28
3    24
4    32
5    31
Name: temperature, dtype: int64

In [31]:
df['temperature'] # max value


0    32
1    35
2    28
3    24
4    32
5    31
Name: temperature, dtype: int64

In [32]:
df['temperature'].max() # max value


35

In [33]:
df['temperature'].min() # min value


24

In [34]:
df['temperature'].mean() # average value


30.333333333333332

In [35]:
df['temperature'].std() # standard deviation v


3.8297084310253524

In [38]:
df.describe()  # data statistics


Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [39]:
df[df.temperature>=32]

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain


In [42]:
df[df.temperature==df['temperature'].max()] # max temp

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [43]:
df['day'][df.temperature==df['temperature'].max()] # print only the day when temp was max

1    1/2/2017
Name: day, dtype: object

In [47]:
df[['day','temperature']][df.temperature==df['temperature'].max()]

Unnamed: 0,day,temperature
1,1/2/2017,35


# Indexing

In [48]:
df


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [49]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [51]:
df.set_index('day', inplace=True) # inplace=True allows to change

In [53]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [54]:
# location... 


df.loc['1/2/2017']

temperature       35
windspeed          7
event          Sunny
Name: 1/2/2017, dtype: object

In [55]:
# returning into the original indexing 

df.reset_index(inplace=True)

In [56]:
df


Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [61]:
df.set_index('temperature',inplace=True)
df

Unnamed: 0_level_0,day,windspeed
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1
32,1/1/2017,6
35,1/2/2017,7
28,1/3/2017,2
24,1/4/2017,7
32,1/5/2017,4
31,1/6/2017,2


In [63]:
df.loc[32]

Unnamed: 0_level_0,day,windspeed
temperature,Unnamed: 1_level_1,Unnamed: 2_level_1
32,1/1/2017,6
32,1/5/2017,4
