# <font color="purple"><h3 align="center">DataFrame Basics</h3></font>

## **Dataframe is most commonly used object in pandas. It is a table like datastructure containing rows and columns similar to excel spreadsheet**

In [2]:
import pandas as pd

In [13]:
weather_data = {
    'day': ['1/1/2017','1/2/2017','1/3/2017','1/4/2017','1/5/2017','1/6/2017'],
    'temperature': [32,35,28,24,32,31],
    'windspeed': [6,7,2,7,4,2],
    'event': ['Rain', 'Sunny', 'Snow','Snow','Rain', 'Sunny']
}
df = pd.DataFrame(weather_data)

In [4]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [5]:
df.dtypes

day            object
temperature     int64
windspeed       int64
event          object
dtype: object

In [6]:
df.ndim

2

In [7]:
df.shape # rows, columns = df.shape

(6, 4)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   day          6 non-null      object
 1   temperature  6 non-null      int64 
 2   windspeed    6 non-null      int64 
 3   event        6 non-null      object
dtypes: int64(2), object(2)
memory usage: 320.0+ bytes


In [9]:
df.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.333333,4.666667
std,3.829708,2.33809
min,24.0,2.0
25%,28.75,2.5
50%,31.5,5.0
75%,32.0,6.75
max,35.0,7.0


In [10]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


## <font color='blue'>Rows</font>

In [11]:
df.head(3) # df.head(3)

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


In [12]:
df.tail(4) # df.tail(2)

Unnamed: 0,day,temperature,windspeed,event
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [14]:
df[1:3]

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow


## <font color='blue'>Columns</font>

In [15]:
df.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [16]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [17]:
df['day'] # or df.day

0    1/1/2017
1    1/2/2017
2    1/3/2017
3    1/4/2017
4    1/5/2017
5    1/6/2017
Name: day, dtype: object

In [18]:
type(df['day'])

pandas.core.series.Series

In [19]:
df[['temperature','day']]

Unnamed: 0,temperature,day
0,32,1/1/2017
1,35,1/2/2017
2,28,1/3/2017
3,24,1/4/2017
4,32,1/5/2017
5,31,1/6/2017


## <font color='blue'>Operations On DataFrame</font>

In [21]:
df['temperature'].max()

35

In [22]:
df[['temperature','windspeed']][df['temperature']>32]

Unnamed: 0,temperature,windspeed
1,35,7


In [23]:
df['event']

0     Rain
1    Sunny
2     Snow
3     Snow
4     Rain
5    Sunny
Name: event, dtype: object

In [24]:
df['event'].unique()

array(['Rain', 'Sunny', 'Snow'], dtype=object)

In [25]:
df['day'][df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

1    1/2/2017
Name: day, dtype: object

In [26]:
df[df['temperature'] == df['temperature'].max()] # Kinda doing SQL in pandas

Unnamed: 0,day,temperature,windspeed,event
1,1/2/2017,35,7,Sunny


In [27]:
df['temperature'].std()

3.8297084310253524

In [28]:
df['temperature'].max() # But mean() won't work since data type is string

35

**Google pandas series operations to find out list of all operations**
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.html

## <font color='blue'>Set and Reset Index</font>

In [29]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [30]:
df.set_index('day')

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [31]:
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [32]:
df.set_index('day', inplace=True)

In [33]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [34]:
df.iloc[2]

temperature      28
windspeed         2
event          Snow
Name: 1/3/2017, dtype: object

In [35]:
df

Unnamed: 0_level_0,temperature,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1/1/2017,32,6,Rain
1/2/2017,35,7,Sunny
1/3/2017,28,2,Snow
1/4/2017,24,7,Snow
1/5/2017,32,4,Rain
1/6/2017,31,2,Sunny


In [36]:
df.index

Index(['1/1/2017', '1/2/2017', '1/3/2017', '1/4/2017', '1/5/2017', '1/6/2017'], dtype='object', name='day')

In [37]:
df.iloc[2:5][['windspeed', 'event']]

Unnamed: 0_level_0,windspeed,event
day,Unnamed: 1_level_1,Unnamed: 2_level_1
1/3/2017,2,Snow
1/4/2017,7,Snow
1/5/2017,4,Rain


In [38]:
df.loc['1/2/2017'][['event','temperature']]

event          Sunny
temperature       35
Name: 1/2/2017, dtype: object

In [39]:
df.reset_index(inplace=True)
df

Unnamed: 0,day,temperature,windspeed,event
0,1/1/2017,32,6,Rain
1,1/2/2017,35,7,Sunny
2,1/3/2017,28,2,Snow
3,1/4/2017,24,7,Snow
4,1/5/2017,32,4,Rain
5,1/6/2017,31,2,Sunny


In [40]:
df.set_index('event',inplace=True) # this is kind of building a hash map using event as a key
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [41]:
df.loc['Snow',['temperature','day']]

Unnamed: 0_level_0,temperature,day
event,Unnamed: 1_level_1,Unnamed: 2_level_1
Snow,28,1/3/2017
Snow,24,1/4/2017


In [None]:
df.iloc[1:3]['temperature']

In [None]:
df.iloc[:,0:2]

In [42]:
df.T # df.transpose()

event,Rain,Sunny,Snow,Snow.1,Rain.1,Sunny.1
day,1/1/2017,1/2/2017,1/3/2017,1/4/2017,1/5/2017,1/6/2017
temperature,32,35,28,24,32,31
windspeed,6,7,2,7,4,2


In [44]:
df.reset_index(inplace = True)

In [45]:
df

Unnamed: 0,event,day,temperature,windspeed
0,Rain,1/1/2017,32,6
1,Sunny,1/2/2017,35,7
2,Snow,1/3/2017,28,2
3,Snow,1/4/2017,24,7
4,Rain,1/5/2017,32,4
5,Sunny,1/6/2017,31,2


##  <font color='blue'>Sorting Dataframe by Index and Values</font>

In [46]:
df.set_index('event', inplace = True)

In [47]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/5/2017,32,4
Sunny,1/6/2017,31,2


In [48]:
df.sort_index(ascending = False, inplace = True)

In [49]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunny,1/2/2017,35,7
Sunny,1/6/2017,31,2
Snow,1/3/2017,28,2
Snow,1/4/2017,24,7
Rain,1/1/2017,32,6
Rain,1/5/2017,32,4


In [50]:
df.sort_values('temperature', axis=0, inplace=True)

In [51]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/4/2017,24,7
Snow,1/3/2017,28,2
Sunny,1/6/2017,31,2
Rain,1/1/2017,32,6
Rain,1/5/2017,32,4
Sunny,1/2/2017,35,7


In [52]:
df.sort_values(['temperature', 'windspeed'], axis=0, inplace=True)

In [53]:
df

Unnamed: 0_level_0,day,temperature,windspeed
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Snow,1/4/2017,24,7
Snow,1/3/2017,28,2
Sunny,1/6/2017,31,2
Rain,1/5/2017,32,4
Rain,1/1/2017,32,6
Sunny,1/2/2017,35,7


In [54]:
df.reset_index(inplace = True)

In [55]:
df

Unnamed: 0,event,day,temperature,windspeed
0,Snow,1/4/2017,24,7
1,Snow,1/3/2017,28,2
2,Sunny,1/6/2017,31,2
3,Rain,1/5/2017,32,4
4,Rain,1/1/2017,32,6
5,Sunny,1/2/2017,35,7


Sorting by one or more columns is supported by sort_values