# Basics for Dataframe

In [27]:
import pandas as pd
df = pd.read_csv('2_weather_data.csv')
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [3]:
# a python dictionary can also be used to create a dataframe using the pd.DataFrame()
weather_data = {
    'day': ['1/1/2017', '1/2/2017', '1/3/2017'],
    'temperature': [32, 35, 28],
    'windspeed': [6, 7, 2],
    'event': ['Rain', 'Sunny', 'Snow']
}
df_dict = pd.DataFrame(weather_data)
df_dict

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


### How to view your data

In [4]:
df.head()  # used to print the first five rows. You can change the number of rows by giving it a parameter

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [6]:
df.tail(2)  # similiar to df.head() but used to print the few rows

Unnamed: 0,day,temperature,windspeed,event
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [7]:
df[2:5]  # similiar to how you get items in a list, it prints the corresponding rows

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain


In [8]:
df.columns  # used to print the columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [9]:
df.event  # same as df['event'], used to get the column information.

0     Rain
1    Sunny
2     Snow
3     Snow
4     Rain
5    Sunny
Name: event, dtype: object

In [13]:
df[['day', 'temperature', 'event']]  # df[[]] can be used to print the desired few columns

Unnamed: 0,day,temperature,event
0,1/1/2017,32,Rain
1,1/2/2017,35,Sunny
2,1/3/2017,28,Snow
3,1/4/2017,24,Snow
4,1/5/2017,32,Rain
5,1/6/2017,31,Sunny


### Operations with data

#### search pandas series operations for all the operations

In [15]:
# max, min, mean, std all can be used with df['column'].max()
df.describe()  # describe() shows all the statistics for each numeric columns

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


### Conditionally select your data

In [16]:
df[df.temperature>=32]  # get all the data where the temperature >= 32

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
4,1/5/2017,32,4,Rain


In [18]:
df[df.temperature==df.temperature.max()]  # get the data where it equals the max temperature

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [19]:
# this syntax prints only the day column of the max temperature row
df['day'][df.temperature==df['temperature'].max()]

1    1/2/2017
Name: day, dtype: object

In [20]:
# print multiple columns for conditional select
df[['day', 'temperature']][df.temperature==df['temperature'].max()]

Unnamed: 0,day,temperature
1,1/2/2017,35


### set_index() and reset_index() methods

In [21]:
df.index  # get the index info

RangeIndex(start=0, stop=6, step=1)

In [28]:
df.set_index('day', inplace=True)  # change the day to be the index
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [29]:
df.loc['1/3/2017']  # now we can access the row by using the date

temperature      28
windspeed         2
event          Snow
Name: 1/3/2017, dtype: object

In [30]:
df.reset_index(inplace=True)  # used to reset the index to the default
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny
