## Pandas practice notebook

In [37]:
## Pandas practice notebook
import pandas as pd
import numpy as np


In [39]:
# Reading a dataset
data = pd.read_csv("train.csv")
data.head(6)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000
5,6,50,RL,85.0,14115,Pave,,IR1,Lvl,AllPub,...,0,,MnPrv,Shed,700,10,2009,WD,Normal,143000


#### Creating a series from various objects 

In [40]:
series1 = pd.Series(data.Id[0:5])
dic  = {'a':'abhi', 'r':'ritika'}
series2 = pd.Series(dic, index=['b', 'c','r', 'd', 'a'])

arr = np.array([1, 2, 3])
series3 = pd.Series(arr)

print('series1')
print(series1)
print('series2')
print(series2)
print('series3')
print(series3)

series1
0    1
1    2
2    3
3    4
4    5
Name: Id, dtype: int64
series2
b       NaN
c       NaN
r    ritika
d       NaN
a      abhi
dtype: object
series3
0    1
1    2
2    3
dtype: int32


### NaN (not a number) is the standard missing data marker used in pandas.

#### .dtype is used to find datatypes of a series or variable or dataframes columns

In [41]:
series3.dtype

dtype('int32')

In [42]:
print(data.Id.dtype)
print(data.Alley.dtype)

int64
object


#### datatype of String is objects

### Series Operations

#### Series have a name attribute

In [43]:
s1 = pd.Series([1,2,3,4,5], name = 'abhi')
s2 = pd.Series([5,4,3,2,1], name = 'riti')
print(s1.name)
print(s2.name)
print(s1+s2)
print(s1-s2)

# e raised to power elements in series
print(np.exp(s1))

# Missing values in operation make Nan
print(s1[1:]+s2[:-1])


abhi
riti
0    6
1    6
2    6
3    6
4    6
dtype: int64
0   -4
1   -2
2    0
3    2
4    4
dtype: int64
0      2.718282
1      7.389056
2     20.085537
3     54.598150
4    148.413159
Name: abhi, dtype: float64
0    NaN
1    6.0
2    6.0
3    6.0
4    NaN
dtype: float64


### DataFrames

DataFrame accepts many different kinds of input:

- Dict of 1D ndarrays, lists, dicts, or Series
- 2-D numpy.ndarray
- Structured or record ndarray
- A Series
- Another DataFrame

In [44]:
dic = {'name':['abhi', 'riti'], 'gender':['male', 'female']}
print(pd.DataFrame(dic))
print(' ')
dic = {'name':{1: 'abhi',2: 'riti'}, 'gender':{1:'male', 2:'female'}}
print(pd.DataFrame(dic))
print(' ')
arr = np.array([[1, 2, 3],[4,5,6]])
print(pd.DataFrame(arr,columns =['a','b','c'], index = [1,2]))
print(' ')
print(data[1:3])

   name  gender
0  abhi    male
1  riti  female
 
   name  gender
1  abhi    male
2  riti  female
 
   a  b  c
1  1  2  3
2  4  5  6
 
   Id  MSSubClass MSZoning  \
1   2          20       RL   
2   3          60       RL   

   LotFrontage  LotArea Street Alley  \
1         80.0     9600   Pave   NaN   
2         68.0    11250   Pave   NaN   

  LotShape LandContour Utilities  ...  \
1      Reg         Lvl    AllPub  ...   
2      IR1         Lvl    AllPub  ...   

  PoolArea PoolQC Fence MiscFeature  \
1        0    NaN   NaN         NaN   
2        0    NaN   NaN         NaN   

  MiscVal MoSold YrSold  SaleType  \
1       0      5   2007        WD   
2       0      9   2008        WD   

   SaleCondition  SalePrice  
1         Normal     181500  
2         Normal     223500  

[2 rows x 81 columns]


### Column selection, addition and deletion

In [45]:
data1= data['Id'][:5]
data1

0    1
1    2
2    3
3    4
4    5
Name: Id, dtype: int64

In [46]:
#data.pop('Id')
data['Id2'] = data.Id+1
data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,,,,0,2,2008,WD,Normal,208500,2
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,,,,0,2,2006,WD,Abnorml,140000,5
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,,,,0,12,2008,WD,Normal,250000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,60,RL,62.0,7917,Pave,,Reg,Lvl,AllPub,...,,,,0,8,2007,WD,Normal,175000,1457
1456,1457,20,RL,85.0,13175,Pave,,Reg,Lvl,AllPub,...,,MnPrv,,0,2,2010,WD,Normal,210000,1458
1457,1458,70,RL,66.0,9042,Pave,,Reg,Lvl,AllPub,...,,GdPrv,Shed,2500,5,2010,WD,Normal,266500,1459
1458,1459,20,RL,68.0,9717,Pave,,Reg,Lvl,AllPub,...,,,,0,4,2010,WD,Normal,142125,1460


In [47]:
data.assign(ID2=lambda x: (x['Id']+2 )).head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2,ID2
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,,,0,2,2008,WD,Normal,208500,2,3
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,,,0,5,2007,WD,Normal,181500,3,4
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,,,0,9,2008,WD,Normal,223500,4,5
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,,,0,2,2006,WD,Abnorml,140000,5,6
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,,,0,12,2008,WD,Normal,250000,6,7


### Indexing and Selection

In [48]:
data[1:3]

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4


In [49]:
data['LotShape'][11:13] ##returns series

11    IR1
12    IR2
Name: LotShape, dtype: object

In [50]:
data[2:3]

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4


In [51]:
data[['Id','Alley']][2:4]

Unnamed: 0,Id,Alley
2,3,
3,4,


In [52]:
#creating a filter
filtr = (data.Id == 2)

In [53]:
data[filtr]

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3


In [54]:
### Based on row and column location
data.iloc[[0,1,2,3],[0,1,2,4,5]]

Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,Street
0,1,60,RL,8450,Pave
1,2,20,RL,9600,Pave
2,3,60,RL,11250,Pave
3,4,70,RL,9550,Pave


In [55]:
data.loc[0:3,'Id':'Alley']

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley
0,1,60,RL,65.0,8450,Pave,
1,2,20,RL,80.0,9600,Pave,
2,3,60,RL,68.0,11250,Pave,
3,4,70,RL,60.0,9550,Pave,


In [56]:
data.loc[data.Id%2==0,'Id':'Alley'][:6]

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley
1,2,20,RL,80.0,9600,Pave,
3,4,70,RL,60.0,9550,Pave,
5,6,50,RL,85.0,14115,Pave,
7,8,60,RL,,10382,Pave,
9,10,190,RL,50.0,7420,Pave,
11,12,60,RL,85.0,11924,Pave,


In [68]:
data.loc[data.Id%2==0,'MSSubClass':'Alley'] = np.nan
data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
0,1,60,RL,65,8450,Pave,,Reg,Lvl,AllPub,...,,,,0,2,2008,WD,Normal,208500,2
1,2,,,,,,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,60,RL,68,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4
3,4,,,,,,,IR1,Lvl,AllPub,...,,,,0,2,2006,WD,Abnorml,140000,5
4,5,60,RL,84,14260,Pave,,IR1,Lvl,AllPub,...,,,,0,12,2008,WD,Normal,250000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,,,,,,,Reg,Lvl,AllPub,...,,,,0,8,2007,WD,Normal,175000,1457
1456,1457,20,RL,85,13175,Pave,,Reg,Lvl,AllPub,...,,MnPrv,,0,2,2010,WD,Normal,210000,1458
1457,1458,,,,,,,Reg,Lvl,AllPub,...,,GdPrv,Shed,2500,5,2010,WD,Normal,266500,1459
1458,1459,20,RL,68,9717,Pave,,Reg,Lvl,AllPub,...,,,,0,4,2010,WD,Normal,142125,1460


### Transposing

In [58]:
data.loc[1:3,'Id':'MSZoning'].T

Unnamed: 0,1,2,3
Id,2.0,3,4.0
MSSubClass,,60,
MSZoning,,RL,


In [59]:
data.iloc[:,[2,3,4,5]].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   MSZoning     1460 non-null   object
 1   LotFrontage  1326 non-null   object
 2   LotArea      1460 non-null   object
 3   Street       1460 non-null   object
dtypes: object(4)
memory usage: 45.8+ KB


In [60]:
pd.set_option('display.width', 40)
data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
0,1,60,RL,65,8450,Pave,,Reg,Lvl,AllPub,...,,,,0,2,2008,WD,Normal,208500,2
1,2,,,,,,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,60,RL,68,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4
3,4,,,,,,,IR1,Lvl,AllPub,...,,,,0,2,2006,WD,Abnorml,140000,5
4,5,60,RL,84,14260,Pave,,IR1,Lvl,AllPub,...,,,,0,12,2008,WD,Normal,250000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,,,,,,,Reg,Lvl,AllPub,...,,,,0,8,2007,WD,Normal,175000,1457
1456,1457,20,RL,85,13175,Pave,,Reg,Lvl,AllPub,...,,MnPrv,,0,2,2010,WD,Normal,210000,1458
1457,1458,,,,,,,Reg,Lvl,AllPub,...,,GdPrv,Shed,2500,5,2010,WD,Normal,266500,1459
1458,1459,20,RL,68,9717,Pave,,Reg,Lvl,AllPub,...,,,,0,4,2010,WD,Normal,142125,1460


In [61]:
pd.set_option('display.max_rows', 10)
data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
0,1,60,RL,65,8450,Pave,,Reg,Lvl,AllPub,...,,,,0,2,2008,WD,Normal,208500,2
1,2,,,,,,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,60,RL,68,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4
3,4,,,,,,,IR1,Lvl,AllPub,...,,,,0,2,2006,WD,Abnorml,140000,5
4,5,60,RL,84,14260,Pave,,IR1,Lvl,AllPub,...,,,,0,12,2008,WD,Normal,250000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,,,,,,,Reg,Lvl,AllPub,...,,,,0,8,2007,WD,Normal,175000,1457
1456,1457,20,RL,85,13175,Pave,,Reg,Lvl,AllPub,...,,MnPrv,,0,2,2010,WD,Normal,210000,1458
1457,1458,,,,,,,Reg,Lvl,AllPub,...,,GdPrv,Shed,2500,5,2010,WD,Normal,266500,1459
1458,1459,20,RL,68,9717,Pave,,Reg,Lvl,AllPub,...,,,,0,4,2010,WD,Normal,142125,1460


In [62]:
df1 = data.loc[:,'Id':'Alley'][:6]
df2 = data.loc[:,'Id':'Alley'][6:10]

pd.concat([df1,df2], axis = 0,ignore_index=True)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley
0,1,60.0,RL,65.0,8450.0,Pave,
1,2,,,,,,
2,3,60.0,RL,68.0,11250.0,Pave,
3,4,,,,,,
4,5,60.0,RL,84.0,14260.0,Pave,
5,6,,,,,,
6,7,20.0,RL,75.0,10084.0,Pave,
7,8,,,,,,
8,9,50.0,RM,51.0,6120.0,Pave,
9,10,,,,,,


In [63]:
df11 = df1.loc[:,'Id':'MSZoning']
df12 = df1.loc[:,['Id','LotArea']]

In [64]:
pd.merge(df11,df12,how = 'inner', on ='Id',indicator=True, validate="one_to_one")#.dtypes ## Outer, inner, left, right

Unnamed: 0,Id,MSSubClass,MSZoning,LotArea,_merge
0,1,60.0,RL,8450.0,both
1,2,,,,both
2,3,60.0,RL,11250.0,both
3,4,,,,both
4,5,60.0,RL,14260.0,both
5,6,,,,both


### Pivoting dataset

In [66]:
df = pd.DataFrame({'Name' : ['Abhishek','Abhishek','Abhishek', 'Ritika','Ritika', 'Ritika'],
                  'subject': ['English','Math','Science', 'English', 'Math', 'Science'],
                  'marks': ['8','6','9', '10', '9', '9']})
df.pivot(index = 'Name',columns='subject', values='marks')

subject,English,Math,Science
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Abhishek,8,6,9
Ritika,10,9,9


In [70]:
data['MSSubClass']

0        60
1       NaN
2        60
3       NaN
4        60
       ... 
1455    NaN
1456     20
1457    NaN
1458     20
1459    NaN
Name: MSSubClass, Length: 1460, dtype: object

In [71]:
data['MSSubClass'] == np.nan

0       False
1       False
2       False
3       False
4       False
        ...  
1455    False
1456    False
1457    False
1458    False
1459    False
Name: MSSubClass, Length: 1460, dtype: bool

### isna, notna, np.nan, fillna, df.fillna(method='pad', limit=1)

df.replace('.', np.nan)

In [76]:
data.replace('NaN', np.nan)



0      NaN
1      NaN
2      NaN
3      NaN
4      NaN
        ..
1455   NaN
1456   NaN
1457   NaN
1458   NaN
1459   NaN
Name: MSSubClass, Length: 1460, dtype: float64

In [77]:
data.describe() #statistical summary

Unnamed: 0,Id,MSSubClass,OverallQual,OverallCond,YearBuilt,YearRemodAdd,MasVnrArea,BsmtFinSF1,BsmtFinSF2,BsmtUnfSF,...,OpenPorchSF,EnclosedPorch,3SsnPorch,ScreenPorch,PoolArea,MiscVal,MoSold,YrSold,SalePrice,Id2
count,1460.0,0.0,1460.0,1460.0,1460.0,1460.0,1452.0,1460.0,1460.0,1460.0,...,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0,1460.0
mean,730.5,,6.099315,5.575342,1971.267808,1984.865753,103.685262,443.639726,46.549315,567.240411,...,46.660274,21.95411,3.409589,15.060959,2.758904,43.489041,6.321918,2007.815753,180921.19589,731.5
std,421.610009,,1.382997,1.112799,30.202904,20.645407,181.066207,456.098091,161.319273,441.866955,...,66.256028,61.119149,29.317331,55.757415,40.177307,496.123024,2.703626,1.328095,79442.502883,421.610009
min,1.0,,1.0,1.0,1872.0,1950.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2006.0,34900.0,2.0
25%,365.75,,5.0,5.0,1954.0,1967.0,0.0,0.0,0.0,223.0,...,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2007.0,129975.0,366.75
50%,730.5,,6.0,5.0,1973.0,1994.0,0.0,383.5,0.0,477.5,...,25.0,0.0,0.0,0.0,0.0,0.0,6.0,2008.0,163000.0,731.5
75%,1095.25,,7.0,6.0,2000.0,2004.0,166.0,712.25,0.0,808.0,...,68.0,0.0,0.0,0.0,0.0,0.0,8.0,2009.0,214000.0,1096.25
max,1460.0,,10.0,9.0,2010.0,2010.0,1600.0,5644.0,1474.0,2336.0,...,547.0,552.0,508.0,480.0,738.0,15500.0,12.0,2010.0,755000.0,1461.0


In [80]:
#data.info()
data.to_csv("dingdong.csv", index= False)

In [83]:
data['MSSubClass'].fillna(0, inplace = True)
data

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
0,1,0.0,RL,65,8450,Pave,,Reg,Lvl,AllPub,...,,,,0,2,2008,WD,Normal,208500,2
1,2,0.0,,,,,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,0.0,RL,68,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4
3,4,0.0,,,,,,IR1,Lvl,AllPub,...,,,,0,2,2006,WD,Abnorml,140000,5
4,5,0.0,RL,84,14260,Pave,,IR1,Lvl,AllPub,...,,,,0,12,2008,WD,Normal,250000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,0.0,,,,,,Reg,Lvl,AllPub,...,,,,0,8,2007,WD,Normal,175000,1457
1456,1457,0.0,RL,85,13175,Pave,,Reg,Lvl,AllPub,...,,MnPrv,,0,2,2010,WD,Normal,210000,1458
1457,1458,0.0,,,,,,Reg,Lvl,AllPub,...,,GdPrv,Shed,2500,5,2010,WD,Normal,266500,1459
1458,1459,0.0,RL,68,9717,Pave,,Reg,Lvl,AllPub,...,,,,0,4,2010,WD,Normal,142125,1460


In [88]:
data['new'] = 1
data.columns
data.column.values

array(['Id', 'MSSubClass', 'MSZoning', 'LotFrontage', 'LotArea', 'Street',
       'Alley', 'LotShape', 'LandContour', 'Utilities', 'LotConfig',
       'LandSlope', 'Neighborhood', 'Condition1', 'Condition2',
       'BldgType', 'HouseStyle', 'OverallQual', 'OverallCond',
       'YearBuilt', 'YearRemodAdd', 'RoofStyle', 'RoofMatl',
       'Exterior1st', 'Exterior2nd', 'MasVnrType', 'MasVnrArea',
       'ExterQual', 'ExterCond', 'Foundation', 'BsmtQual', 'BsmtCond',
       'BsmtExposure', 'BsmtFinType1', 'BsmtFinSF1', 'BsmtFinType2',
       'BsmtFinSF2', 'BsmtUnfSF', 'TotalBsmtSF', 'Heating', 'HeatingQC',
       'CentralAir', 'Electrical', '1stFlrSF', '2ndFlrSF', 'LowQualFinSF',
       'GrLivArea', 'BsmtFullBath', 'BsmtHalfBath', 'FullBath',
       'HalfBath', 'BedroomAbvGr', 'KitchenAbvGr', 'KitchenQual',
       'TotRmsAbvGrd', 'Functional', 'Fireplaces', 'FireplaceQu',
       'GarageType', 'GarageYrBlt', 'GarageFinish', 'GarageCars',
       'GarageArea', 'GarageQual', 'GarageCond', 'Pav

In [90]:
data.reset_index(drop = True, inplace = True)

In [93]:
data.drop(['new'],axis =1)

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice,Id2
0,1,0.0,RL,65,8450,Pave,,Reg,Lvl,AllPub,...,,,,0,2,2008,WD,Normal,208500,2
1,2,0.0,,,,,,Reg,Lvl,AllPub,...,,,,0,5,2007,WD,Normal,181500,3
2,3,0.0,RL,68,11250,Pave,,IR1,Lvl,AllPub,...,,,,0,9,2008,WD,Normal,223500,4
3,4,0.0,,,,,,IR1,Lvl,AllPub,...,,,,0,2,2006,WD,Abnorml,140000,5
4,5,0.0,RL,84,14260,Pave,,IR1,Lvl,AllPub,...,,,,0,12,2008,WD,Normal,250000,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1455,1456,0.0,,,,,,Reg,Lvl,AllPub,...,,,,0,8,2007,WD,Normal,175000,1457
1456,1457,0.0,RL,85,13175,Pave,,Reg,Lvl,AllPub,...,,MnPrv,,0,2,2010,WD,Normal,210000,1458
1457,1458,0.0,,,,,,Reg,Lvl,AllPub,...,,GdPrv,Shed,2500,5,2010,WD,Normal,266500,1459
1458,1459,0.0,RL,68,9717,Pave,,Reg,Lvl,AllPub,...,,,,0,4,2010,WD,Normal,142125,1460


In [94]:
pd.Series(range(1,100))

0      1
1      2
2      3
3      4
4      5
      ..
94    95
95    96
96    97
97    98
98    99
Length: 99, dtype: int64

In [105]:
from datetime import timedelta
d1 = pd.date_range('2020/1/1', periods =  10, freq ='7D') #D= regular days, B= business days
d1+timedelta(days =7)

DatetimeIndex(['2020-01-08',
               '2020-01-15',
               '2020-01-22',
               '2020-01-29',
               '2020-02-05',
               '2020-02-12',
               '2020-02-19',
               '2020-02-26',
               '2020-03-04',
               '2020-03-11'],
              dtype='datetime64[ns]', freq='W-WED')

### group by

In [117]:
df = pd.DataFrame({'Name' : ['Abhishek','Abhishek','Abhishek', 'Ritika','Ritika', 'Ritika'],
                  'subject': ['English','Math','Science', 'English', 'Math', 'Science'],
                  'marks': [8,6,9,10,9,9]})
f = {'marks':['sum'],}
df.groupby(['Name']).agg(f)

Unnamed: 0_level_0,marks
Unnamed: 0_level_1,sum
Name,Unnamed: 1_level_2
Abhishek,23
Ritika,28
