# Introduction to Pandas DataFrames

In [None]:
import pandas as pd

In [None]:
weather_data = {
'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
'temperature': [32,35,28,24,32,31],
'windspeed': [6,7,2,7,4,2],
'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}

### Display data (first, last, sample, column titles)

In [None]:
df = pd.DataFrame(weather_data)

In [None]:
df.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny


In [None]:
df.tail(3)

Unnamed: 0,day,temperature,windspeed,event
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [None]:
df.sample(3)

Unnamed: 0,day,temperature,windspeed,event
5,1/6/2017,31,2,Sunny
0,1/1/2017,32,6,Rain
2,1/3/2017,28,2,Snow


In [None]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [None]:
df.dtypes

day            object
temperature     int64
windspeed       int64
event          object
dtype: object

### Index (change,inplace,reset)

In [None]:
df.set_index('day',inplace=True) #operation happens on the original copy of the dataframe

In [None]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [None]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


### Selecting a subset of the DataFrame (one or multiple columns)

In [None]:
df['temperature']

0    32
1    35
2    28
3    24
4    32
5    31
Name: temperature, dtype: int64

In [None]:
df.temperature

0    32
1    35
2    28
3    24
4    32
5    31
Name: temperature, dtype: int64

In [None]:
df[['temperature','event']]

Unnamed: 0,temperature,event
0,32,Rain
1,35,Sunny
2,28,Snow
3,24,Snow
4,32,Rain
5,31,Sunny


In [None]:
dg = df.temperature
dg
#dg is a entirely new dataframe

0    32
1    35
2    28
3    24
4    32
5    31
Name: temperature, dtype: int64

### Info about the whole DataFrame (info, describe, value_counts)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   day          6 non-null      object
 1   temperature  6 non-null      int64 
 2   windspeed    6 non-null      int64 
 3   event        6 non-null      object
dtypes: int64(2), object(2)
memory usage: 320.0+ bytes


In [None]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [None]:
df['event'].value_counts()

Snow     2
Sunny    2
Rain     2
Name: event, dtype: int64

### Deleting columns (inplace True/False)

In [None]:
df.drop('event',axis = 1)

Unnamed: 0,day,temperature,windspeed
0,1/1/2017,32,6
1,1/2/2017,35,7
2,1/3/2017,28,2
3,1/4/2017,24,7
4,1/5/2017,32,4
5,1/6/2017,31,2


In [None]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny
