<a href="https://colab.research.google.com/github/gmehra123/data_science_projs/blob/main/Exception_Handling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Project Introduction
#### Objective
A file with multiple errors has is read in and passed through a test_data function. It has the following errors
  * **Missing values**
  * **Negative values** in certain columns like population which cannot be negative
  * **Blank Strings** in certain columns

* The objective of exception handling is to raise the user defined exception for each of these errors and stop excution till the error is fixed

* This prevents cascading errors and does not require *manual visual insoection* to handle errors.


In [1]:
# Importing libraries
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
import seaborn as sns
import numpy as np

### Exception

In [18]:
def calc_FV(PV,r,nper):
  FV = (1+r)**nper * PV
  return (FV)

In [19]:
# Runnning just a basic FV function
calc_FV(100,0.04,10)

148.02442849183444

In [20]:
# No exception handing so using a rate of -100%. Interest rates cannot be a negative 100%. Clearly you need to build some exception handing
calc_FV(100,-1.0,10)

0.0

In [21]:
# Negative time periods
calc_FV(100,0.2,-10)

16.15055828898458

In [24]:
def calc_FV(PV,r,nper):
  try:
    if PV<0:
      raise ValueError
    if (r<0)|(r>1):
      raise ValueError
    if nper<0:
      raise ValueError

  except ValueError:
    print("Out of bounds. Please enter positive numbers and rate should be between 0 and 1")
  else:
    FV = (1+r)**nper * PV
    return(FV)

In [25]:
# Catches negative interest rate exception
calc_FV(100,-0.3,10)

Out of bounds. Please enter positive numbers and rate should be between 0 and 1


In [26]:
# Catches negative PV exception
calc_FV(-100,0.1,10)

Out of bounds. Please enter positive numbers and rate should be between 0 and 1


In [27]:
calc_FV(100,0.04,-4)

Out of bounds. Please enter positive numbers and rate should be between 0 and 1


In [28]:
# Runs correctly
calc_FV(100,0.2,20)

3833.7599924474725

In [29]:
calc_FV("one",0.2,10)

TypeError: ignored

In [30]:
def calc_FV(PV,r,nper):
  try:
    if PV<0:
      raise ValueError
    if (r<0)|(r>1):
      raise ValueError

  except ValueError:
    print("Out of bounds. Please enter positive numbers")
  except TypeError:
    print("Please enter numbers")
  else:
    FV = (1+r)**nper * PV
    return(FV)

In [31]:
calc_FV("one",0.2,10)

Please enter numbers


### Exception handling on Dataframes

In [32]:
# Reading in the file check for read error
try:
  data = pd.read_csv('drive/MyDrive/Colab Notebooks/wrong.csv')
except IOError:
  print("File not found check Path!!!")

In [33]:
# Not possible to spot errors by visual inspection
data.head(10)

Unnamed: 0,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
5,52.0,919.0,213.0,413.0,193.0,4.0368,269700.0,NEAR BAY
6,52.0,2535.0,489.0,1094.0,514.0,3.6591,299200.0,NEAR BAY
7,52.0,3104.0,687.0,1157.0,647.0,3.12,-100000.0,NEAR BAY
8,42.0,2555.0,665.0,1206.0,595.0,2.0804,226700.0,NEAR BAY
9,52.0,3549.0,707.0,1551.0,714.0,3.6912,261100.0,NEAR BAY


In [34]:
data.columns

Index(['housing_median_age', 'total_rooms', 'total_bedrooms', 'population',
       'households', 'median_income', 'median_house_value', 'ocean_proximity'],
      dtype='object')

In [62]:
# Creating user defined exception classes
class MissingValue(Exception):
  pass

class BlankStrings(Exception):
  pass

class SpecialCharacters(Exception):
  pass

class NegativeValues(Exception):
  pass



In [67]:
data.isna().any().sum()

2

In [68]:
def test_data_check(data1):
  fnl_data = pd.DataFrame()
  try:
    # check missing values in table
    if data1.isna().any().sum()>0:
      raise MissingValue

    # check blank strings
    elif data1.applymap(lambda x: x == ' ').sum().sum()>0:
      raise BlankStrings


    #check negative values
    elif (data.select_dtypes('float')<0).sum().sum()>0:
      raise NegativeValues

    #else clause if tests passed
    else:
      print("Checks Passed")
      fnl_data = data1

  except MissingValue:
    print("Missing values found. Check File!!!")

  except BlankStrings:
    print("Blank String values Found !!!")

  except NegativeValues:
    print("Negative numeric values found !!!")
  return(fnl_data)


In [69]:
# First Exception raised
test_data_check(data)

Missing values found. Check File!!!


In [70]:
# Find the columns with missing data
data.isna().sum()

housing_median_age      0
total_rooms           100
total_bedrooms        207
population              0
households              0
median_income           0
median_house_value      0
ocean_proximity         0
dtype: int64

In [71]:
# Resolving Missing value error
total_rooms_mean = data.total_rooms.mean()
total_bedrooms_mean = data.total_bedrooms.mean()
data.fillna(value= {'total_rooms':total_rooms_mean,'total_bedrooms': total_bedrooms_mean},inplace=True)

In [72]:
# Second exception raised
test_data_check(data)

Blank String values Found !!!


In [73]:
# Resolving Blank String Error
data.replace(to_replace=' ', value='NOT_KNOWN', inplace = True)

In [74]:
# Third exception raised
test_data_check(data)

Negative numeric values found !!!


In [75]:
# Looks like 2 Median house values are negative
(data.select_dtypes('float')<0).sum()>0

housing_median_age    False
total_rooms           False
total_bedrooms        False
population            False
households            False
median_income         False
median_house_value     True
dtype: bool

In [76]:
# resolving the negative value error
data.loc[data.median_house_value<0,['median_house_value']]=0

In [77]:
# All checks passed and data ready to use.
fnl_data=test_data_check(data)

Checks Passed


In [78]:
fnl_data

Unnamed: 0,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
0,41.0,880.0,129.0,322.0,126.0,8.3252,452600.0,NEAR BAY
1,21.0,7099.0,1106.0,2401.0,1138.0,8.3014,358500.0,NEAR BAY
2,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
3,52.0,1274.0,235.0,558.0,219.0,5.6431,341300.0,NEAR BAY
4,52.0,1627.0,280.0,565.0,259.0,3.8462,342200.0,NEAR BAY
...,...,...,...,...,...,...,...,...
20635,25.0,1665.0,374.0,845.0,330.0,1.5603,78100.0,INLAND
20636,18.0,697.0,150.0,356.0,114.0,2.5568,77100.0,INLAND
20637,17.0,2254.0,485.0,1007.0,433.0,1.7000,92300.0,INLAND
20638,18.0,1860.0,409.0,741.0,349.0,1.8672,84700.0,INLAND
