# <font color="purple"><h3 align="center">DataFrame Basics Tutorial</h3></font>

## **Dataframe is most commonly used object in pandas. It is a table like datastructure containing rows and columns similar to excel spreadsheet**

In [1]:
import pandas as pd

df = pd.read_csv("resources/weather_data.csv")
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain


In [2]:
df.shape # rows, columns = df.shape

(7, 4)

## <font color='blue'>Rows</font>

In [3]:
df.head() # df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain


In [4]:
df.tail() # df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain
5,1/6/2017,31,2,Sunny
6,1/6/2017,34,5,0


In [5]:
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow


## <font color='blue'>Columns</font>

In [6]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [7]:
df['day'] # or df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
6    1/6/2017
Name: day, dtype: object

In [8]:
type(df['day'])

pandas.core.series.Series

In [9]:
df[['day','temperature','windspeed']]

Unnamed: 0,day,temperature,windspeed
0,1/1/2017,32,6
1,1/2/2017,-99999,7
2,1/3/2017,28,-99999
3,1/4/2017,-99999,7
4,1/5/2017,32,-99999
5,1/6/2017,31,2
6,1/6/2017,34,5


## <font color='blue'>Operations On DataFrame</font>

In [10]:
df['temperature'].min()

-99999

In [11]:
df[df['temperature']>32]

Unnamed: 0,day,temperature,windspeed,event
6,1/6/2017,34,5,0


In [12]:
df['day'][df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

6    1/6/2017
Name: day, dtype: object

In [13]:
df[df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

Unnamed: 0,day,temperature,windspeed,event
6,1/6/2017,34,5,0


In [14]:
df['temperature'].std()

48809.83736131575

In [15]:
df['event'].max() # But mean() won't work since data type is string

'Sunny'

In [16]:
df.describe()

Unnamed: 0,temperature,windspeed
count,7.0,7.0
mean,-28548.714286,-28567.285714
std,48809.837361,48797.150657
min,-99999.0,-99999.0
25%,-49985.5,-49998.5
50%,31.0,5.0
75%,32.0,6.5
max,34.0,7.0


**Google pandas series operations to find out list of all operations**
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html

## <font color='blue'>set_index</font>

In [17]:
df.set_index('day')

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,-99999,7,Sunny
1/3/2017,28,-99999,Snow
1/4/2017,-99999,7,0
1/5/2017,32,-99999,Rain
1/6/2017,31,2,Sunny
1/6/2017,34,5,0


In [18]:
df.set_index('day', inplace=True)

In [19]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,-99999,7,Sunny
1/3/2017,28,-99999,Snow
1/4/2017,-99999,7,0
1/5/2017,32,-99999,Rain
1/6/2017,31,2,Sunny
1/6/2017,34,5,0


In [20]:
df.index

Index(['1/1/2017', '1/2/2017', '1/3/2017', '1/4/2017', '1/5/2017', '1/6/2017',
       '1/6/2017'],
      dtype='object', name='day')

In [21]:
df.loc['1/2/2017']

temperature    -99999
windspeed           7
event           Sunny
Name: 1/2/2017, dtype: object

In [22]:
df.reset_index(inplace=True)
df.head()

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,-99999,7,Sunny
2,1/3/2017,28,-99999,Snow
3,1/4/2017,-99999,7,0
4,1/5/2017,32,-99999,Rain


In [23]:
df.set_index('event',inplace=True) # this is kind of building a hash map using event as a key
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,-99999,7
Snow,1/3/2017,28,-99999
0,1/4/2017,-99999,7
Rain,1/5/2017,32,-99999
Sunny,1/6/2017,31,2
0,1/6/2017,34,5


In [24]:
df.loc['Snow']

day            1/3/2017
temperature          28
windspeed        -99999
Name: Snow, dtype: object