In [2]:
import pickle
import pandas as pd
from geopy import distance
pd.set_option('display.max_columns', None)
# useful functions and classes

# This class stores the latitude and longitude of a sample, and indicates 
# if this location has the desired variable we are estimating
class Location:
    def __init__(self,latitude,longitude,hasv,ID,value):
        self.ID = ID
        self.latitude = latitude
        self.longitude = longitude
        self.hasv = hasv
        self.value = value
        
    def __str__(self):
        return str(self.ID)

# Calculates the distance between 2 samples in km
def getdist(S1,S2):
    # radius of earth in km
    coords_1 = (S1.latitude, S1.longitude)
    coords_2 = (S2.latitude, S2.longitude)
    dist = distance.distance(coords_1, coords_2).km
    return dist

# filters out data if a point is missing in one of the colunns
def filterblanks(columns,data,blank):
    # if blank is true, rows with blanks in these columns
    # if blank is false, remove rows with non blanks or non zeros in these columns
    for c in columns:
        if blank:
            data = data[data[c].notnull()]
        else:
            data = data[data[c].isnull()]
    return data

# PRE: all locations in the dataframe are
# unique
def DistanceMatrix(dataframe,variable):
    # the list of location objects
    locations = []
    # the list of indexes where the the row is located in the dataframe
    #indexes = []
    for index,row in dataframe.iterrows():
        # make a location object on this row
        if pd.isnull(row[variable]):
            hasv = False
        else:
            hasv = True
        locations.append(Location(row["LATITUDE"],row["LONGITUDE"],hasv,row["LOCATCD"],row[variable]))
        #indexes.append(index)
        
    matrix = pd.DataFrame(0,index=locations,columns=locations)
    for ci,column in enumerate(locations):
        for ri,row in enumerate(locations):
            if ri>ci:
                # compute distance between column and row
                dist = getdist(row,column)
            elif ci>ri:
                dist = matrix.iloc[ci,ri]
            # put this distance in the dataframe
            else:
                continue
            matrix.iloc[ri,ci] = dist
    return matrix

def changeVar(DM,data,variable):
    locations = DM.index
    # loop through each location
    for i,loc in enumerate(locations):
        ID = loc.ID
        row = data[data["LOCATCD"]==ID]
        if pd.isnull(row[variable]):
            locations[i].hasv = False
            locations[i].value = None
        else:
            locations[i].hasv = True
            locations[i].value = row[variable]
            
    DM.index = locations
    DM.columns = locations
        
def getclosest(numclosest,distancematrix,location):
    # Make a set of the closest locations that contain variable
    closest = {}
    column = distancematrix.loc[:,location].copy()
    #print(type(distancematrix.index[0]))
    # Filter the locations that dont have the desired variable
    doesnthavev = []
    for i in range(len(column)):
        if not column.index[i].hasv:
            doesnthavev.append(column.index[i])
    column.drop(doesnthavev,inplace = True)
    #print(type(column))
    column.sort_values(inplace = True)
    # The current location wouldnt be in column because
    # it doesnt have the variable
    
    return column.iloc[0:numclosest]

# Key: Location Code
# Value: List of tuples (locatcd,distance,value)
def makeDict(data,variable,numclosest=2):
    D = DistanceMatrix(data,variable)
    # Loop through each location without a value for variable
    closestDict = {}
    for loc in D.columns:
        if not loc.hasv:
            # Get the closest locations to loc
            closest = getclosest(numclosest,D,loc)
            # The list of tuples that contain location id, the distance, and the value for variable
            tuples = []
            for i,dist in enumerate(closest):
                ID = closest.index[i].ID
                val = closest.index[i].value
                tuples.append((ID,dist,val))
            closestDict[loc.ID] = tuples
    return closestDict

def predict(tuples,numclosest = 2):
    loc2 = tuples[0]
    loc3 = tuples[1]
    d12 = loc2[1]
    val2 = loc2[2]
    d13 = loc3[1]
    val3 = loc3[2]
    
    c2 = d12/(d12+d13)
    c3 = d13/(d12+d13)
    
    predicted = c2*val2+c3*val3
    
    return predicted

# NEEDS WORK
def addpredictions(df,variables,numclosest):
    # make prediction and insert for each variable
    first = True
    for var in variables:
        if first:
            DM = DistanceMatrix(df,var)
        else:
            changeVar(DM,df,var)
            
        for loc in DM.columns:
            if not loc.hasv:
                # Get the closest locations to loc
                closest = getclosest(numclosest,DM,loc)
                # The list of tuples that contain location id, the distance, and the value for variable
                tuples = []
                for i,dist in enumerate(closest):
                    ID = closest.index[i].ID
                    val = closest.index[i].value
                    tuples.append((ID,dist,val))
                closestDict[loc.ID] = tuples
                
def run():
    DictTN = makeDict(data,"TN")
    DictTP = makeDict(data,"TP")
    #put in predicted TN
    data["PredictedTN"] = 0
    for index,row in data.iterrows():
        if pd.isnull(row["TN"]):
            data.loc[index,"PredictedTN"] = predict(DictTN[row["LOCATCD"]])
        else:
            data.loc[index,"PredictedTN"] = row["TN"]

    data["PredictedTP"] = 0
    for index,row in data.iterrows():
        if pd.isnull(row["TP"]):
            data.loc[index,"PredictedTP"] = predict(DictTP[row["LOCATCD"]])
        else:
            data.loc[index,"PredictedTP"] = row["TP"]   
        
    print(data.shape)
    print("Filtering out points with blank entries in at least one of the columns")
    cols = ['PredictedTN','PredictedTP','TEMP','DO','TURB','COND','VEL','SS','WDP','CHLcal','SECCHI']
    #for col in cols:
    #    print(col,data[col].isna().sum())
    qualdata_prediction = filterblanks(cols,data,True)
    print(qualdata_prediction.shape)

In [9]:
data = pickle.load(open( "water_data_coords.p", "rb" ))
#path = r"C:\Users\cashe\OneDrive\Desktop\Data Science\Mississippi River analysis\Krouth_water_and_veg_data_w_latlong\ltrm_water_data_lat_long.csv"
#doug_data = pd.read_csv(path,low_memory=False)
#veg_path = r"C:\Users\cashe\OneDrive\Desktop\Data Science\Mississippi River analysis\ltrm_water_data.csv"
#veg_data = pd.read_csv(veg_path,low_memory = False)
print(data.shape)


(106052, 133)


In [11]:
print(data["TP"].isna().sum())

75081


In [10]:
data.drop(data.columns.difference(['SHEETBAR','TN','TP','TPQF','TNQF','SS','SSQF',
                                         'TURB','TURBQF','WDP',
                                         'TEMP','TEMPQF','DO','DOQF','COND',
                                         'CONDQF','VEL','VELQF','FLDEAST',
                                         'FLDNORTH','PROJCD','FLDNUM','DATE',
                                  'LOCATCD','STRATUM','CHLcal','SECCHI','SECCHIQF','LATITUDE','LONGITUDE']), 1, inplace=True)
print("After filtering columns: ",data.shape)
data = data[(data.PROJCD == "M-")]
print("After filtering sampling design: ",data.shape)
data = data[(data.FLDNUM == 3)]
print("After filtering Pool 13: ",data.shape)
print("Now adding a year column")
data["YEAR"] = pd.DatetimeIndex(data["DATE"]).year
print(data.shape)
print("Adding a timecode column")
data["TIME CODE"] = data["LOCATCD"].astype(str).apply(lambda x: x[3])
print(data.shape)
print("Filtering by backwater lakes")
data = data[data.STRATUM == 3]
print(data.shape)
#print("Filtering by summer")
#data = data[data["TIME CODE"] == '2']
#print(data.shape)
print("Dropping data with SSQF=8 or 64")
qualdata = data[(data["SSQF"]!=8)&(data["SSQF"]!=64)]
print(qualdata.shape)
print("Dropping all blank columns")
qualdata.drop(['PROJCD','FLDEAST','FLDNORTH','TPQF','TNQF','SSQF','TURBQF','TEMPQF','DOQF',
                                        'CONDQF','VELQF','SECCHIQF'], 1, inplace=True)
print(qualdata.shape)
print("Filtering out points with blank entries in columns other than TP and TN")
f_cols = ['SS','CHLcal']
s_cols = ['VEL','TEMP']
all_cols = ['TEMP','DO','TURB','COND','VEL','SS','WDP','CHLcal','SECCHI']
qualdata_noprediction = filterblanks(all_cols,qualdata,True)
print(qualdata_noprediction.shape)

After filtering columns:  (106052, 30)
After filtering sampling design:  (106052, 30)
After filtering Pool 13:  (17991, 30)
Now adding a year column
(17991, 31)
Adding a timecode column
(17991, 32)
Filtering by backwater lakes
(8097, 32)
Dropping data with SSQF=8 or 64
(8093, 32)
Dropping all blank columns
(8093, 20)
Filtering out points with blank entries in columns other than TP and TN
(5517, 20)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


In [8]:
qualdata_noprediction.columns

Index(['FLDNUM', 'DATE', 'LOCATCD', 'WDP', 'SECCHI', 'STRATUM', 'TEMP', 'DO',
       'TURB', 'COND', 'VEL', 'TP', 'TN', 'SS', 'CHLcal', 'LATITUDE',
       'LONGITUDE', 'YEAR', 'TIME CODE'],
      dtype='object')

In [11]:
print("Building a new dataframe with predicted TP and TN values")
#s = qualdata_noprediction["LOCATCD"].duplicated(keep=False)
# get the years and timecodes for this dataset
#predictions can only be made if the point is in the same year and time code
years = qualdata_noprediction["YEAR"].unique()
timecodes = qualdata_noprediction["TIME CODE"].unique()
qualdata_prediction = pd.DataFrame()
for year in years:
    for timecode in timecodes:
        print("Appending predicted data for ",year," timecode ",timecode)
        # curset is the current set of rows we are predicting for
        curset = qualdata_noprediction[qualdata_noprediction["YEAR"]==year]
        curset = curset[curset["TIME CODE"]==timecode]
        curset["PredictedTN"] = 0
        curset["PredictedTP"] = 0
        print(curset.shape)
        DictTN = makeDict(curset,"TN")
        DictTP = makeDict(curset,"TP")
        #check to see if there are valid locations
        # that can be used to predict
        bad = bool((curset["TN"].isnull().sum()>(curset.shape[0]-2))|(curset["TP"].isnull().sum()>(curset.shape[0]-2)))
        print(curset["TN"].isnull().sum(),curset["TP"].isnull().sum())
        if(bad):
            print("Less than 2 locations have the variables in this set, dropping rows without variable")
            curset = curset[(curset["TP"].notnull())&(curset["TN"].notnull())]
            curset["PredictedTN"] = curset["TN"]
            curset["PredictedTP"] = curset["TP"]
            print("Cur set is now ",curset.shape)
        else:
            #put in predicted TN
            for index,row in curset.iterrows():
                if pd.isnull(row["TN"]):
                    #print("Predicting ",row["LOCATCD"])
                    curset.loc[index,"PredictedTN"] = predict(DictTN[row["LOCATCD"]])
                else:
                    curset.loc[index,"PredictedTN"] = row["TN"]
            #put in predicted TP
            for index,row in curset.iterrows():
                if pd.isnull(row["TP"]):
                    curset.loc[index,"PredictedTP"] = predict(DictTP[row["LOCATCD"]])
                else:
                    curset.loc[index,"PredictedTP"] = row["TP"] 
                    
        qualdata_prediction = qualdata_prediction.append(curset,ignore_index=True)  
print("Final data set size is ",qualdata_prediction.shape)

Building a new dataframe with predicted TP and TN values
Appending predicted data for  1993  timecode  2
(41, 22)
16 16
Appending predicted data for  1993  timecode  3
(53, 22)
25 25
Appending predicted data for  1993  timecode  4
(0, 22)
0 0
Less than 2 locations have the variables in this set, dropping rows without variable
Cur set is now  (0, 22)
Appending predicted data for  1993  timecode  1
(0, 22)
0 0
Less than 2 locations have the variables in this set, dropping rows without variable
Cur set is now  (0, 22)
Appending predicted data for  1994  timecode  2
(58, 22)
27 27
Appending predicted data for  1994  timecode  3
(0, 22)
0 0
Less than 2 locations have the variables in this set, dropping rows without variable
Cur set is now  (0, 22)
Appending predicted data for  1994  timecode  4
(49, 22)
23 23
Appending predicted data for  1994  timecode  1
(58, 22)
26 26
Appending predicted data for  1995  timecode  2
(59, 22)
27 27
Appending predicted data for  1995  timecode  3
(59, 22)
2

In [16]:
qualdata_prediction.drop(qualdata_prediction.columns.difference(['TEMP','VEL','PredictedTN']), 1, inplace=True)

In [17]:
qualdata_prediction.isna().sum()

TEMP           0
VEL            0
PredictedTN    0
dtype: int64

In [12]:
qualdata_prediction.to_excel("Predicted_allyear_backwater_Barcode.xlsx")

In [238]:
qualdata_prediction.shape

(684, 21)

In [59]:
data["TN"].min(),data["TN"].max()

(0.185, 46.989)

In [199]:
qualdata_prediction = pd.DataFrame

In [195]:
cols = ['TN','TP','TEMP','DO','TURB','COND','VEL','SS','WDP','CHLcal','SECCHI']
pure = filterblanks(cols,qualdata_prediction,True)
pure.shape

(83, 21)

In [167]:
DictTN['9432121']

[('9432048', 0.1999431644333697, 1.192), ('9632130', 0.679881575347745, nan)]

In [134]:
qualdata_prediction = filterblanks(cols,qualdata_prediction,True)
print(qualdata_prediction.shape)

(170, 21)


In [20]:
print("Filtering out points with blank entries in at least one of the columns")
cols = ['PredictedTN','PredictedTP','TEMP','DO','TURB','COND','VEL','SS','WDP','CHLcal','SECCHI']
#for col in cols:
#    print(col,data[col].isna().sum())
qualdata_prediction = filterblanks(cols,data,True)
print(qualdata_prediction.shape)

Filtering out points with blank entries in at least one of the columns
(46, 33)


In [135]:
data[data["SECCHIQF"].isnull()&data["TEMPQF"].isnull()&data["DOQF"].isnull()&data["TURBQF"].isnull()&data["CONDQF"].isnull()
    &data["VELQF"].isnull()&data["TPQF"].isnull()&data["TNQF"].isnull()&(data["SSQF"]==0)].shape

(50, 31)

In [46]:
data.head(50)

Unnamed: 0,FLDNUM,DATE,PROJCD,LOCATCD,WDP,SECCHI,SECCHIQF,STRATUM,FLDEAST,FLDNORTH,TEMP,TEMPQF,DO,DOQF,TURB,TURBQF,COND,CONDQF,VEL,VELQF,TP,TPQF,TN,TNQF,SS,SSQF,CHLcal,LATITUDE,LONGITUDE,YEAR,TIME CODE,PredictedTN,PredictedTP
52045,3,07/25/2012,M-,1232072,1.32,54.0,,2,,,27.7,,6.9,,14.0,,352.0,,0.1,,,,,,22.2,0.0,23.94708,42.2222,-90.3894,2012,2,1.328857,0.227558
52046,3,07/25/2012,M-,1232053,5.5,62.0,,1,,,27.7,,6.7,,16.0,,349.0,,,X,,,,,19.3,0.0,19.70628,42.2207,-90.3828,2012,2,1.32925,0.228165
52047,3,07/25/2012,M-,1232054,5.1,74.0,,1,,,27.8,,6.8,,15.0,,349.0,,,X,,,,,21.8,0.0,28.61196,42.2036,-90.3423,2012,2,1.33517,0.237321
52048,3,07/25/2012,M-,1232001,5.6,72.0,,1,,,27.8,,6.8,,12.0,,348.0,,,X,0.164,0.0,1.254,0.0,20.7,0.0,23.94708,42.2035,-90.3399,2012,2,1.254,0.164
52049,3,07/25/2012,M-,1232074,1.1,56.0,,2,,,27.9,,7.0,,22.0,,351.0,,0.12,,,,,,26.1,0.0,27.057,42.2063,-90.341,2012,2,1.334782,0.23672
52050,3,07/25/2012,M-,1232074,1.1,56.0,,2,,,,X,,X,,X,,X,,X,,,,,,,,42.2063,-90.341,2012,2,1.334782,0.23672
52051,3,07/25/2012,M-,1232073,2.0,64.0,,2,,,28.0,,7.0,,14.0,,352.0,,0.05,,,,,,18.6,0.0,21.4026,42.2086,-90.3457,2012,2,1.333569,0.234846
52052,3,07/25/2012,M-,1232073,2.0,64.0,,2,,,27.9,,6.9,,,X,352.0,,,X,,,,,,,,42.2086,-90.3457,2012,2,1.333569,0.234846
52053,3,07/25/2012,M-,1232055,3.1,72.0,,1,,,27.8,,6.8,,13.0,,349.0,,,X,,,,,20.1,0.0,21.82668,42.2017,-90.3376,2012,2,1.336662,0.239628
52054,3,07/25/2012,M-,1232002,8.7,78.0,,1,,,27.8,,6.9,,13.0,,348.0,,,X,0.152,0.0,1.28,0.0,15.2,0.0,23.24028,42.1903,-90.3114,2012,2,1.28,0.152


In [100]:
data[(data["SECCHIQF"]==0)|(data["TEMPQF"]==0)|(data["DOQF"]==0)|(data["TURBQF"]==0)|(data["CONDQF"]==0)|
    (data["VELQF"]==0)|(data["TPQF"]==0)|(data["TNQF"]==0)].shape

(81, 31)

In [117]:
#put in predicted TN
data["PredictedTN"] = 0
for index,row in data.iterrows():
    if pd.isnull(row["TN"]):
        data.loc[index,"PredictedTN"] = predict(DictTN[row["LOCATCD"]])
    else:
        data.loc[index,"PredictedTN"] = row["TN"]

data["PredictedTP"] = 0
for index,row in data.iterrows():
    if pd.isnull(row["TP"]):
        data.loc[index,"PredictedTP"] = predict(DictTP[row["LOCATCD"]])
    else:
        data.loc[index,"PredictedTP"] = row["TP"]      

In [110]:
data.shape

(151, 33)

In [118]:
print("Filtering out points with blank entries in at least one of the columns")
cols = ['PredictedTN','PredictedTP','TEMP','DO','TURB','COND','VEL','SS','WDP','CHLcal','SECCHI']
#for col in cols:
#    print(col,data[col].isna().sum())
qualdata_prediction = filterblanks(cols,data,True)
print(qualdata_prediction.shape)

Filtering out points with blank entries in at least one of the columns
(120, 33)


In [51]:
data["PredictedTP"].describe()

count    151.000000
mean       0.265133
std        0.248787
min        0.015000
25%        0.167185
50%        0.185000
75%        0.209663
max        1.355000
Name: PredictedTP, dtype: float64

In [52]:
data["TP"].describe()

count    81.000000
mean      0.246494
std       0.220238
min       0.015000
25%       0.167000
50%       0.185000
75%       0.203000
max       1.355000
Name: TP, dtype: float64

In [53]:
data["PredictedTN"].describe()

count    151.000000
mean       2.195319
std        0.373832
min        0.899000
25%        2.013365
50%        2.282123
75%        2.449000
max        2.736000
Name: PredictedTN, dtype: float64

In [54]:
data["TN"].describe()

count    81.000000
mean      2.214444
std       0.411976
min       0.899000
25%       2.129000
50%       2.315000
75%       2.477000
max       2.736000
Name: TN, dtype: float64

In [57]:
data.isna().sum()

FLDNUM           0
DATE             0
PROJCD           0
LOCATCD          0
WDP              1
SECCHI           1
SECCHIQF       139
STRATUM          0
FLDEAST        151
FLDNORTH       151
TEMP             1
TEMPQF         150
DO               1
DOQF           150
TURB             1
TURBQF         150
COND             1
CONDQF         150
VEL             31
VELQF          120
TP              70
TPQF            70
TN              70
TNQF            70
SS               1
SSQF             1
CHLcal           1
LATITUDE         0
LONGITUDE        0
YEAR             0
TIME CODE        0
PredictedTN      0
PredictedTP      0
dtype: int64

In [62]:
pickle.dump(data, open( "summer_1997.p", "wb" ) )

In [13]:
matrix = DistanceMatrix(data,"TN")

In [14]:
array = matrix.to_numpy()

In [15]:
def transpose(mat, tr, N): 
    for i in range(N): 
        for j in range(N): 
            tr[i][j] = mat[j][i] 
   
# Returns true if mat[N][N] is symmetric, else false 
def isSymmetric(mat, N): 
      
    tr = [ [0 for j in range(len(mat[0])) ] for i in range(len(mat)) ] 
    transpose(mat, tr, N) 
    for i in range(N): 
        for j in range(N): 
            if (mat[i][j] != tr[i][j]): 
                return False
    return True
   
# Driver code 
if (isSymmetric(array, 151)): 
    print ("Yes")
else: 
    print ("No")

Yes


(151, 151)

In [7]:
closest = getclosest(3,matrix,matrix.columns[58])

<class '__main__.Location'>
0.0


9732061     0.000000
9732060     1.097430
9732062     1.612008
9732008     2.011911
9732025     3.130428
             ...    
9732050    20.539670
9732022    20.800828
9732064    21.923900
9732001    22.458735
9732021    25.755405
Name: 9732061, Length: 81, dtype: float64

0.0


9732060     1.097430
9732062     1.612008
9732008     2.011911
9732025     3.130428
9732037     3.286177
             ...    
9732050    20.539670
9732022    20.800828
9732064    21.923900
9732001    22.458735
9732021    25.755405
Name: 9732061, Length: 80, dtype: float64

In [38]:
for i,dist in enumerate(closest):
    print(closest.index[i].value,dist)

1.075 1.0974304557353052
1.7619999999999998 1.612008115880842
2.51 2.0119109371293327


In [39]:
closest

9732060    1.097430
9732062    1.612008
9732008    2.011911
Name: 9732061, dtype: float64

In [200]:
data = pd.DataFrame({'Brand' : ['Maruti', 'Hyundai', 'Tata', 
                                'Mahindra', 'Maruti', 'Hyundai', 
                                'Renault', 'Tata', 'Maruti'], 
                     'Year' : [2012, 2014, 2011, 2015, 2012,  
                               2016, 2014, 2018, 2019], 
                     'Kms Driven' : [50000, 30000, 60000,  
                                     25000, 10000, 46000,  
                                     31000, 15000, 12000], 
                     'City' : ['Gurgaon', 'Delhi', 'Mumbai',  
                               'Delhi', 'Mumbai', 'Delhi',  
                               'Mumbai','Chennai',  'Ghaziabad'], 
                     'Mileage' :  [28, 27, 25, 26, 28,  
                                   29, 24, 21, 24]})

In [210]:
data

Unnamed: 0,Brand,Year,Kms Driven,City,Mileage
0,Maruti,2012,50000,Gurgaon,28
1,Hyundai,2014,30000,Delhi,27
2,Tata,2011,60000,Mumbai,25
3,Mahindra,2015,25000,Delhi,26
4,Maruti,2012,10000,Mumbai,28
5,Hyundai,2016,46000,Delhi,29
6,Renault,2014,31000,Mumbai,24
7,Tata,2018,15000,Chennai,21
8,Maruti,2019,12000,Ghaziabad,24


In [217]:
data[data["Year"]==2012]

Unnamed: 0,Brand,Year,Kms Driven,City,Mileage
0,Maruti,2012,50000,Gurgaon,28
4,Maruti,2012,10000,Mumbai,28


In [8]:
df = pd.DataFrame({'Brand' : ['M', 'Hyun', 'ta', 
                                'Mahdra', 'Muti', 'Hyuai', 
                                'nault', 'ata', 'Muti'], 
                     'Year' : [2012, 204, 2011, 2015, 2012,  
                               2016, 2014, 2018, 2019], 
                     'Kms Driven' : [8000, 3000, 60000,  
                                     25000, 1000, 46000,  
                                     31000, 1500, 12000], 
                     'City' : ['Gurgaon', 'Dhi', 'Mumbai',  
                               'Dhi', 'Muai', 'Delhi',  
                               'Mbai','Cheai',  'Ghaziabad'], 
                     'Mileage' :  [28, 27, 25, 26, 28,  
                                   29, 24, 21, 24]})

In [102]:
s = data.loc[:,"Brand"]
s[s.index==4]

4    Maruti
Name: Brand, dtype: object

In [5]:
df = pd.DataFrame(0,
     index=['cobra', 'viper', 'sidewinder'],
     columns=['max_speed', 'shield'])

In [10]:
df.append(data,ignore_index=True)

Unnamed: 0,Brand,Year,Kms Driven,City,Mileage
0,M,2012,8000,Gurgaon,28
1,Hyun,204,3000,Dhi,27
2,ta,2011,60000,Mumbai,25
3,Mahdra,2015,25000,Dhi,26
4,Muti,2012,1000,Muai,28
5,Hyuai,2016,46000,Delhi,29
6,nault,2014,31000,Mbai,24
7,ata,2018,1500,Cheai,21
8,Muti,2019,12000,Ghaziabad,24
9,Maruti,2012,50000,Gurgaon,28


In [107]:
range(1,5)

range(1, 5)

In [24]:
d = {"Loc1":[('LOC1','dist','val'),('LOC2','dist','val')]}

In [25]:
nest = d["Loc1"]

In [151]:
df = pd.DataFrame({'BoolCol': [True, False, False, True, True]},
       index=[10,20,30,40,50])

In [159]:
df.index[5]

IndexError: index 5 is out of bounds for axis 0 with size 5