## Pandas .loc and .iloc

In [1]:
import pandas as pd

In [2]:
caiso_data = pd.read_csv('CAISO_2017to2018.csv')

In [4]:
caiso_data.head(10)

Unnamed: 0.1,Unnamed: 0,GEOTHERMAL,BIOMASS,BIOGAS,SMALL HYDRO,WIND TOTAL,SOLAR PV,SOLAR THERMAL
0,2017-08-29 00:00:00,1181,340,156,324,1551,0,0
1,2017-08-29 01:00:00,1182,338,156,326,1556,0,0
2,2017-08-29 02:00:00,1183,337,156,337,1325,0,0
3,2017-08-29 03:00:00,1185,339,156,313,1158,0,0
4,2017-08-29 04:00:00,1190,344,156,320,1209,0,0
5,2017-08-29 05:00:00,1194,351,157,331,1209,0,0
6,2017-08-29 06:00:00,1196,359,155,382,1126,121,0
7,2017-08-29 07:00:00,1194,363,153,380,1164,2291,0
8,2017-08-29 08:00:00,1187,364,153,346,903,5854,66
9,2017-08-29 09:00:00,1189,367,157,356,711,7469,296


In [5]:
caiso_data['BIOGAS']

0       156
1       156
2       156
3       156
4       156
       ... 
8755    236
8756    234
8757    233
8758    234
8759    235
Name: BIOGAS, Length: 8760, dtype: int64

In [7]:
caiso_data['BIOGAS'][8759]

235

## Anatomy of a dataframe

In [8]:
caiso_data.columns

Index(['Unnamed: 0', 'GEOTHERMAL', 'BIOMASS', 'BIOGAS', 'SMALL HYDRO',
       'WIND TOTAL', 'SOLAR PV', 'SOLAR THERMAL'],
      dtype='object')

In [9]:
caiso_data.index

RangeIndex(start=0, stop=8760, step=1)

In [11]:
caiso_data.columns[0] = 'Date and time'

TypeError: Index does not support mutable operations

In [12]:
cols = caiso_data.columns.to_list()

In [14]:
cols[0] = 'Date and time'

In [15]:
caiso_data.columns = cols

In [16]:
caiso_data.head()

Unnamed: 0,Date and time,GEOTHERMAL,BIOMASS,BIOGAS,SMALL HYDRO,WIND TOTAL,SOLAR PV,SOLAR THERMAL
0,2017-08-29 00:00:00,1181,340,156,324,1551,0,0
1,2017-08-29 01:00:00,1182,338,156,326,1556,0,0
2,2017-08-29 02:00:00,1183,337,156,337,1325,0,0
3,2017-08-29 03:00:00,1185,339,156,313,1158,0,0
4,2017-08-29 04:00:00,1190,344,156,320,1209,0,0


In [19]:
caiso_data.iloc[2,5]

1325

In [20]:
caiso_data.iloc[0:4,2:6]

Unnamed: 0,BIOMASS,BIOGAS,SMALL HYDRO,WIND TOTAL
0,340,156,324,1551
1,338,156,326,1556
2,337,156,337,1325
3,339,156,313,1158


In [22]:
caiso_data.iloc[-10:,:]

Unnamed: 0,Date and time,GEOTHERMAL,BIOMASS,BIOGAS,SMALL HYDRO,WIND TOTAL,SOLAR PV,SOLAR THERMAL
8750,2018-08-28 14:00:00,933,338,240,268,2590,9437,631
8751,2018-08-28 15:00:00,933,338,238,266,2915,8680,617
8752,2018-08-28 16:00:00,934,337,239,334,3148,7376,615
8753,2018-08-28 17:00:00,934,336,235,456,3447,5490,506
8754,2018-08-28 18:00:00,955,337,237,534,3270,1992,231
8755,2018-08-28 19:00:00,962,332,236,581,3300,70,24
8756,2018-08-28 20:00:00,967,336,234,547,3468,0,17
8757,2018-08-28 21:00:00,972,336,233,502,3310,0,17
8758,2018-08-28 22:00:00,975,333,234,361,3068,0,0
8759,2018-08-28 23:00:00,977,333,235,262,2921,0,0


In [39]:
caiso_data.iloc[100:101,2:6]

Unnamed: 0,BIOMASS,BIOGAS,SMALL HYDRO,WIND TOTAL
100,367,165,314,301


In [36]:
caiso_data.loc[100:101,'BIOMASS':'WIND TOTAL']

Unnamed: 0,BIOMASS,BIOGAS,SMALL HYDRO,WIND TOTAL
100,367,165,314,301
101,378,166,345,232


## Boolean/logical indexing

In [42]:
caiso_data_stack = pd.read_csv('CAISO_2017to2018_stack.csv',index_col=0)
caiso_data_stack

Unnamed: 0,Source,MWh
2017-08-29 00:00:00,GEOTHERMAL,1181
2017-08-29 00:00:00,BIOMASS,340
2017-08-29 00:00:00,BIOGAS,156
2017-08-29 00:00:00,SMALL HYDRO,324
2017-08-29 00:00:00,WIND TOTAL,1551
...,...,...
2018-08-28 23:00:00,BIOGAS,235
2018-08-28 23:00:00,SMALL HYDRO,262
2018-08-28 23:00:00,WIND TOTAL,2921
2018-08-28 23:00:00,SOLAR PV,0


In [44]:
caiso_data_stack.loc['2018-08-28 23:00:00','Source']

2018-08-28 23:00:00       GEOTHERMAL
2018-08-28 23:00:00          BIOMASS
2018-08-28 23:00:00           BIOGAS
2018-08-28 23:00:00      SMALL HYDRO
2018-08-28 23:00:00       WIND TOTAL
2018-08-28 23:00:00         SOLAR PV
2018-08-28 23:00:00    SOLAR THERMAL
Name: Source, dtype: object

In [45]:
wind_index = (caiso_data_stack['Source']=='WIND TOTAL')
wind_index

2017-08-29 00:00:00    False
2017-08-29 00:00:00    False
2017-08-29 00:00:00    False
2017-08-29 00:00:00    False
2017-08-29 00:00:00     True
                       ...  
2018-08-28 23:00:00    False
2018-08-28 23:00:00    False
2018-08-28 23:00:00     True
2018-08-28 23:00:00    False
2018-08-28 23:00:00    False
Name: Source, Length: 61320, dtype: bool

In [46]:
caiso_data_stack.loc[wind_index,:]

Unnamed: 0,Source,MWh
2017-08-29 00:00:00,WIND TOTAL,1551
2017-08-29 01:00:00,WIND TOTAL,1556
2017-08-29 02:00:00,WIND TOTAL,1325
2017-08-29 03:00:00,WIND TOTAL,1158
2017-08-29 04:00:00,WIND TOTAL,1209
...,...,...
2018-08-28 19:00:00,WIND TOTAL,3300
2018-08-28 20:00:00,WIND TOTAL,3468
2018-08-28 21:00:00,WIND TOTAL,3310
2018-08-28 22:00:00,WIND TOTAL,3068


## Merge

In [47]:
import numpy as np

In [48]:
fruit_info_df = pd.read_csv('fruit_info.csv')

In [49]:
fruit_info_df

Unnamed: 0,fruit,color,weight
0,apple,red,120
1,banana,yellow,150
2,orange,orange,250
3,raspberry,pink,15


In [51]:
fruit_info_df.color

0       red
1    yellow
2    orange
3      pink
Name: color, dtype: object

In [52]:
price_df = pd.DataFrame({'price':[0.5, 0.65, 1, 0.15],
                        'frut':['apple', 'banana', 'orange', 'rasberry']})

In [53]:
price_df

Unnamed: 0,price,frut
0,0.5,apple
1,0.65,banana
2,1.0,orange
3,0.15,rasberry


In [54]:
pd.merge(price_df,fruit_info_df)

MergeError: No common columns to perform merge on. Merge options: left_on=None, right_on=None, left_index=False, right_index=False

In [55]:
pd.merge(price_df,fruit_info_df,left_on='frut',right_on = 'fruit')

Unnamed: 0,price,frut,fruit,color,weight
0,0.5,apple,apple,red,120
1,0.65,banana,banana,yellow,150
2,1.0,orange,orange,orange,250


In [56]:
fruit_info_df

Unnamed: 0,fruit,color,weight
0,apple,red,120
1,banana,yellow,150
2,orange,orange,250
3,raspberry,pink,15


In [57]:
price_df

Unnamed: 0,price,frut
0,0.5,apple
1,0.65,banana
2,1.0,orange
3,0.15,rasberry


In [60]:
pd.merge(price_df,fruit_info_df,left_on='frut',right_on = 'fruit',how='outer')

Unnamed: 0,price,frut,fruit,color,weight
0,0.5,apple,apple,red,120.0
1,0.65,banana,banana,yellow,150.0
2,1.0,orange,orange,orange,250.0
3,0.15,rasberry,,,
4,,,raspberry,pink,15.0


In [62]:
price_df.loc[3,'frut']='raspberry'

In [63]:
pd.merge(price_df,fruit_info_df,left_on='frut',right_on = 'fruit',how='outer')

Unnamed: 0,price,frut,fruit,color,weight
0,0.5,apple,apple,red,120
1,0.65,banana,banana,yellow,150
2,1.0,orange,orange,orange,250
3,0.15,raspberry,raspberry,pink,15
