In [2]:
import pandas as pd
import numpy as np

data = pd.read_csv('weather.csv')

In [7]:
#select all those at the top 2

data.head(2)

Unnamed: 0,day,temperature,windspeed,event
0,01/01/2017,32,6,Rain
1,01/02/2017,35,7,Sunny


In [10]:
#select column according to row 
data['day']

0    01/01/2017
1    01/02/2017
2    01/03/2017
3    01/04/2017
4    01/05/2017
5    01/06/2017
Name: day, dtype: object

In [12]:
# select via slicing 0 to 3 but not including index 3
data[0:3]

Unnamed: 0,day,temperature,windspeed,event
0,01/01/2017,32,6,Rain
1,01/02/2017,35,7,Sunny
2,01/03/2017,28,2,Snow


In [18]:
#copy data and minipulation 
dataCopy = data.copy()

dataCopy[:5] = 0
dataCopy

Unnamed: 0,day,temperature,windspeed,event
0,0,0,0,0
1,0,0,0,0
2,0,0,0,0
3,0,0,0,0
4,0,0,0,0
5,01/06/2017,32,2,Sunny


In [32]:
#displaying index and columns
data.index
data.columns

Index(['day', 'temperature', 'windspeed', 'event'], dtype='object')

In [33]:
data.to_numpy()

array([['01/01/2017', 32, 6, 'Rain'],
       ['01/02/2017', 35, 7, 'Sunny'],
       ['01/03/2017', 28, 2, 'Snow'],
       ['01/04/2017', 24, 7, 'Snow'],
       ['01/05/2017', 32, 4, 'Rain'],
       ['01/06/2017', 32, 2, 'Sunny']], dtype=object)

In [35]:
data.dtypes

day            object
temperature     int64
windspeed       int64
event          object
dtype: object

In [37]:
df2 = pd.DataFrame({'A': 1.,
               'B': pd.Timestamp('20130102'),
                  'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                   'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                     'F': 'foo'})
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-02,1.0,3,test,foo
1,1.0,2013-01-02,1.0,3,train,foo
2,1.0,2013-01-02,1.0,3,test,foo
3,1.0,2013-01-02,1.0,3,train,foo


In [38]:
df2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [40]:
# shows a quick statistical summary of your data 
data.describe()

Unnamed: 0,temperature,windspeed
count,6.0,6.0
mean,30.5,4.666667
std,3.885872,2.33809
min,24.0,2.0
25%,29.0,2.5
50%,32.0,5.0
75%,32.0,6.75
max,35.0,7.0


In [42]:
data[data.temperature > 25]


Unnamed: 0,day,temperature,windspeed,event
0,01/01/2017,32,6,Rain
1,01/02/2017,35,7,Sunny
2,01/03/2017,28,2,Snow
4,01/05/2017,32,4,Rain
5,01/06/2017,32,2,Sunny


In [5]:
data['wind*temp'] = data.temperature * data.windspeed
data

Unnamed: 0,day,temperature,windspeed,event,wind*temp
0,01/01/2017,32,6,Rain,192
1,01/02/2017,35,7,Sunny,245
2,01/03/2017,28,2,Snow,56
3,01/04/2017,24,7,Snow,168
4,01/05/2017,32,4,Rain,128
5,01/06/2017,32,2,Sunny,64


In [3]:
data

Unnamed: 0,day,temperature,windspeed,event
0,01/01/2017,32,6,Rain
1,01/02/2017,35,7,Sunny
2,01/03/2017,28,2,Snow
3,01/04/2017,24,7,Snow
4,01/05/2017,32,4,Rain
5,01/06/2017,32,2,Sunny


In [10]:
data.mean()

temperature     30.500000
windspeed        4.666667
wind*temp      142.166667
dtype: float64

In [27]:
# String lower case
data.event.str.lower()

0     rain
1    sunny
2     snow
3     snow
4     rain
5    sunny
Name: event, dtype: object

In [30]:
# you Can also divide up into pices 
dataPieces = [data[:2], data[2:3], data[3:]]
dataPieces
pd.concat(dataPieces)

Unnamed: 0,day,temperature,windspeed,event,wind*temp
0,01/01/2017,32,6,Rain,192
1,01/02/2017,35,7,Sunny,245
2,01/03/2017,28,2,Snow,56
3,01/04/2017,24,7,Snow,168
4,01/05/2017,32,4,Rain,128
5,01/06/2017,32,2,Sunny,64


In [35]:
# you can also merge columns of different dataFrames

left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})

right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

left
right
pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [37]:
# you can also append
s = data.iloc[2]
data.append(s, ignore_index=True)

Unnamed: 0,day,temperature,windspeed,event,wind*temp
0,01/01/2017,32,6,Rain,192
1,01/02/2017,35,7,Sunny,245
2,01/03/2017,28,2,Snow,56
3,01/04/2017,24,7,Snow,168
4,01/05/2017,32,4,Rain,128
5,01/06/2017,32,2,Sunny,64
6,01/03/2017,28,2,Snow,56


In [38]:
# Grouping can be done on a given column
data.groupby('event').sum()

Unnamed: 0_level_0,temperature,windspeed,wind*temp
event,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,64,10,320
Snow,52,9,224
Sunny,67,9,309


In [40]:
# multiple grouping 
data.groupby(['event', 'temperature']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,windspeed,wind*temp
event,temperature,Unnamed: 2_level_1,Unnamed: 3_level_1
Rain,32,10,320
Snow,24,7,168
Snow,28,2,56
Sunny,32,2,64
Sunny,35,7,245
