In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 

In [2]:
df = pd.read_csv('nyc_weather.csv')

In [3]:
df.head(15)

Unnamed: 0,EST,Temperature,DewPoint,Humidity,Sea Level PressureIn,VisibilityMiles,WindSpeedMPH,PrecipitationIn,CloudCover,Events,WindDirDegrees
0,1/1/2016,38,23,52,30.03,10,8.0,0,5,,281
1,1/2/2016,36,18,46,30.02,10,7.0,0,3,,275
2,1/3/2016,40,21,47,29.86,10,8.0,0,1,,277
3,1/4/2016,25,9,44,30.05,10,9.0,0,3,,345
4,1/5/2016,20,-3,41,30.57,10,5.0,0,0,,333
5,1/6/2016,33,4,35,30.5,10,4.0,0,0,,259
6,1/7/2016,39,11,33,30.28,10,2.0,0,3,,293
7,1/8/2016,39,29,64,30.2,10,4.0,0,8,,79
8,1/9/2016,44,38,77,30.16,9,8.0,T,8,Rain,76
9,1/10/2016,50,46,71,29.59,4,,1.8,7,Rain,109


#### Dataframe

In [4]:
#Create a dataframe using dictionary 
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}

df = pd.DataFrame(weather_data)

In [5]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [6]:
df.shape #rows,cols


(6, 4)

### Row's Operation

In [7]:
#to get data from top 
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [9]:
#to get data from bottom 
df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [11]:
#to get sample from data
df.sample(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
5,1/6/2017,31,2,Sunny


In [14]:
#to get data from certain range
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


### Column's Operation

In [18]:
#get list of columns
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [21]:
#get data of particular column
#method 1
df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [22]:
#get data of particular column
#method 2
df['day']

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [27]:
#get data type 
type(df['day'])

pandas.core.series.Series

In [31]:
#get multiple columns 

df[['day','event']]

Unnamed: 0,day,event
0,1/1/2017,Rain
1,1/2/2017,Sunny
2,1/3/2017,Snow
3,1/4/2017,Snow
4,1/5/2017,Rain
5,1/6/2017,Sunny


### Operations on Dataframe


https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html

In [34]:
#get max value
df['temperature'].max()

35

In [39]:
#get the row where temp are greater than 32
df[df['temperature']>32]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [40]:
#give the day when temp was max 
df['day'][df['temperature'] == df ['temperature'].max()]

1    1/2/2017
Name: day, dtype: object

In [41]:
#give the row when temp was max 
df[df['temperature'] == df ['temperature'].max()]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [42]:
#standard deviation of temp 
df['temperature'].std()

3.8297084310253524

In [43]:
#to know the stats values of data
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


### Setting Index

In [44]:
df.set_index('day')

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [45]:
#setting date as index permanently
df.set_index('day', inplace=True)

In [46]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [47]:
#find the data based on index 
df.loc['1/2/2017']

temperature       35
windspeed          7
event          Sunny
Name: 1/2/2017, dtype: object

In [48]:
#reseting index 
df.reset_index(inplace=True)
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [49]:
# this is kind of building a hash map using event as a key
df.set_index('event',inplace=True) 
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [50]:
df.loc['Snow']

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
