In [162]:
'''
Topics to be covered:
1. Creating dataframes
2. Dealing with rows and columns
3. Operations: min, max, std, describe
4. Conditional selection
5. set_index
'''

'\nTopics to be covered:\n1. Creating dataframes\n2. Dealing with rows and columns\n3. Operations: min, max, std, describe\n4. Conditional selection\n5. set_index\n'

In [163]:
# Creating dataframe using csv file
import pandas as pd
df = pd.read_csv('weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [164]:
# creating dataframe using dictionary
weather_data = {
    'day' : ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature' : [32,35,28,24,32,31],
    'windspeed' : [6,7,2,7,4,2],
    'event' : ['Rain','Sunny','Snow','Snow','Rain','Sunny']
}
df = pd.DataFrame(weather_data)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [165]:
# df.shape tell you the shape of a dataframe in a tuple value. To print rows and columns in a dataframe: 

rows, columns = df.shape
rows

6

In [166]:
df.head() # print only a initial few rows
df.tail() # print only last few rows
df[2:5] # print rows 3 to 4
df[:] # print everything
df    # print everything

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [167]:
df.columns # to print all the columns headings in the dataframe

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [168]:
# to print a column
df['day']
df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [169]:
# finds out the type of columns
type(df['day']) 

pandas.core.series.Series

In [170]:
# to print only selected columns
df[['event','day','temperature']] 

Unnamed: 0,event,day,temperature
0,Rain,1/1/2017,32
1,Sunny,1/2/2017,35
2,Snow,1/3/2017,28
3,Snow,1/4/2017,24
4,Rain,1/5/2017,32
5,Sunny,1/6/2017,31


In [171]:
# OPERATIONS:
df['temperature'].max() # to find out max value in a column
df['temperature'].mean() # to find out mean
df['temperature'].std()# to find out max value in a column standard deviation
df.describe() # this gives all the statistical outputs

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [172]:
df[df['temperature']>=32] # to get ENTIRE ROWS where temp was more than x
df[df['temperature']==df['temperature'].max()] # it will print the ENTIRE ROW where temp was max
df['day'][df['temperature']==df['temperature'].max()] # it will print ONLY THE DAY when temp was max
df[['day','windspeed']][df['temperature']==df['temperature'].max()] # it will print DAY and WINDSPEED when temp was max
# there are a lot of operations in pandas, which can be accessed by searching pandas series operations on Google

Unnamed: 0,day,windspeed
1,1/2/2017,7


In [173]:
# BELOW ARE THE NOTES FOR INDEX 
df.index # to find out what is my current index

RangeIndex(start=0, stop=6, step=1)

In [174]:
df.set_index('day', inplace=True) # setting dates as index 
#now to write the original dataframe with day as index, we use inplace=True.

# IF code is run AGAIN for 'day' to be set as index, then it will cause an error, but day is already an index

In [175]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [176]:
#loc: used to find data for a particular INDEX
df.loc['1/2/2017']
# incase the dataframe has 2 rows with same index, both rows will be printed

temperature       35
windspeed          7
event          Sunny
Name: 1/2/2017, dtype: object

In [177]:
# to RESET INDEX
df.reset_index(inplace=True) #inplace=True used to apply change to the dataframe

In [178]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny
