## `Importing and displaying datasets`

### <i>All imports goes here!</i>

In [39]:
import pandas as pd
from IPython.display import display
from sklearn.preprocessing import LabelEncoder


### <i>Reading the data</i>

In [40]:
result_2008 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2008_result.csv')
result_2013 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2013_result.csv')
result_2017 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2017_result.csv')
result_2022 = pd.read_csv('Data/Cleaned_Data/cleaned_&_final_2022_result.csv')

display(result_2008.head())
display(result_2013.head())
display(result_2017.head())
display(result_2022.head())

Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Achham,Achham 1,uml,2119.0358,40,54.53
1,1,Achham,Achham 2,maoist,1708.727,32,52.82
2,2,Arghakhanchi,Arghakhanchi 1,maoist,1966.7836,46,55.34
3,3,Arghakhanchi,Arghakhanchi 2,nepalicongress,1755.126,46,52.05
4,4,Baglung,Baglung 1,maoist,1828.5047,48,60.01


Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),7434.0,37.0,73.12
1,1,Taplejung,Taplejung-2,Nepal Communist Party (UML),7034.0,48.0,72.6
2,2,Panchthar,Panchthar-1,Nepal Communist Party (UML),13082.0,26.0,72.18
3,3,Panchthar,Panchthar-2,Nepali Congress,11839.0,41.0,74.58
4,4,Illam,Illam-1,Nepal Communist Party (UML),17342.0,55.0,78.58


Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),15417,46,59.060916
1,1,Taplejung,Taplejung-1,Nepali Congress,10974,51,62.215124
2,2,Taplejung,Taplejung-1,Federal Democratic National Front,861,57,54.605809
3,3,Taplejung,Taplejung-1,Naya Shakti Party Nepal,267,41,52.116411
4,4,Taplejung,Taplejung-1,Federal Socialist Forum Nepal,158,45,67.28869


Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),21943,56,49.859126
1,1,Taplejung,Taplejung-1,Nepal Communist Party (Maoist Center) (Unified...,21735,46,53.744678
2,2,Taplejung,Taplejung-1,Federal Democratic National Front,941,56,54.895159
3,3,Taplejung,Taplejung-1,"Janata Samajwadi Party, Nepal",628,49,51.052482
4,4,Taplejung,Taplejung-1,Rastriya Prajatantra Party,406,65,54.328004


## `Data Cleaning (if necessary)`

### <i>Checking if there are any missing values</i>

In [41]:
missing_values_2008 = result_2008.isnull().sum()
print("Missing Values for 2008 result:\n", missing_values_2008)

missing_values_2013 = result_2013.isnull().sum()
print("Missing Values for 2013 result:\n", missing_values_2013)

missing_values_2017 = result_2017.isnull().sum()
print("Missing Values for 2017 result:\n", missing_values_2017)

missing_values_2022 = result_2022.isnull().sum()
print("Missing Values for 2022 result:\n", missing_values_2022)


Missing Values for 2008 result:
 Unnamed: 0    0
District      0
Const         0
WParty        0
WVotes        0
Age           0
TurnOut%      0
dtype: int64
Missing Values for 2013 result:
 Unnamed: 0     0
District       0
Const          0
WParty        47
WVotes        47
Age            0
TurnOut%       0
dtype: int64
Missing Values for 2017 result:
 Unnamed: 0    0
District      0
Const         0
WParty        0
WVotes        0
Age           0
TurnOut%      0
dtype: int64
Missing Values for 2022 result:
 Unnamed: 0    0
District      0
Const         0
WParty        0
WVotes        0
Age           0
TurnOut%      0
dtype: int64


### <i>Handling missing values</i>

In [42]:
# Using info to check the data type of missing value columns i.e., WParty and WVotes 
result_2013.info()
# Data type found to be oject and float64 for WParty and WVotes respectively

# For WParty which is of object datatype
for column in result_2013['WParty']:
    
    result_2013['WParty'].fillna(result_2013['WParty'].mode()[0], inplace=True)
    
# For WVoteswhich is of float datatype
for column in result_2013['WVotes']:
       
    result_2013['WVotes'].fillna(result_2013['WVotes'].median(), inplace=True)

# Checking if there are still missing values present
missing_values_2013 = result_2013.isnull().sum()
print("Missing Values for 2013 result:\n", missing_values_2013)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240 entries, 0 to 239
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  240 non-null    int64  
 1   District    240 non-null    object 
 2   Const       240 non-null    object 
 3   WParty      193 non-null    object 
 4   WVotes      193 non-null    float64
 5   Age         240 non-null    float64
 6   TurnOut%    240 non-null    float64
dtypes: float64(3), int64(1), object(3)
memory usage: 13.2+ KB


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  result_2013['WParty'].fillna(result_2013['WParty'].mode()[0], inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  result_2013['WVotes'].fillna(result_2013['WVotes'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work becau

Missing Values for 2013 result:
 Unnamed: 0    0
District      0
Const         0
WParty        0
WVotes        0
Age           0
TurnOut%      0
dtype: int64


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  result_2013['WVotes'].fillna(result_2013['WVotes'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  result_2013['WVotes'].fillna(result_2013['WVotes'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work becaus

### <i>Checking if there are any duplicate values</i>

In [43]:
duplicates_08 = result_2008.duplicated().sum()
print(f"Number of duplicate rows in 2008 result: {duplicates_08}")

duplicates_13 = result_2013.duplicated().sum()
print(f"Number of duplicate rows in 2013 result: {duplicates_13}")

duplicates_17 = result_2017.duplicated().sum()
print(f"Number of duplicate rows in 2017 result: {duplicates_17}")

duplicates_22 = result_2022.duplicated().sum()
print(f"Number of duplicate rows in 2022 result: {duplicates_22}")

Number of duplicate rows in 2008 result: 0
Number of duplicate rows in 2013 result: 0
Number of duplicate rows in 2017 result: 0
Number of duplicate rows in 2022 result: 0


### <i>Checking if there are any inconsistencies in data</i>

In [None]:
# Standardizing case and remove leading/trailing spaces
result_2008['District'] = result_2008['District'].str.lower().str.strip()


## `Feature engineering`

### <i>Encoding categorical Variables</i>

#### <i><u>Performing target variable encoding for WParty</u></i>

In [None]:
label_encoder = LabelEncoder()
result_2008['WParty_encoded'] = label_encoder.fit_transform(result_2008['WParty'])

result_2013['WParty_encoded'] = label_encoder.fit_transform(result_2013['WParty'])

result_2017['WParty_encoded'] = label_encoder.fit_transform(result_2017['WParty'])

result_2022['WParty_encoded'] = label_encoder.fit_transform(result_2022['WParty'])

result_2022.head()

Unnamed: 0.1,Unnamed: 0,District,Const,WParty,WVotes,Age,TurnOut%,WParty_encoded
0,0,Taplejung,Taplejung-1,Nepal Communist Party (UML),21943,56,49.859126,28
1,1,Taplejung,Taplejung-1,Nepal Communist Party (Maoist Center) (Unified...,21735,46,53.744678,26
2,2,Taplejung,Taplejung-1,Federal Democratic National Front,941,56,54.895159,5
3,3,Taplejung,Taplejung-1,"Janata Samajwadi Party, Nepal",628,49,51.052482,15
4,4,Taplejung,Taplejung-1,Rastriya Prajatantra Party,406,65,54.328004,51


#### <i><u>Performing one-hot-encoding for District and Const</u></i>

In [None]:
result_2008 = pd.get_dummies(result_2008, columns=['District'], drop_first=True)
result_2008 = pd.get_dummies(result_2008, columns=['Const'], drop_first=True)

result_2013 = pd.get_dummies(result_2013, columns=['District'], drop_first=True)
result_2013 = pd.get_dummies(result_2013, columns=['Const'], drop_first=True)

result_2017 = pd.get_dummies(result_2017, columns=['District'], drop_first=True)
result_2017 = pd.get_dummies(result_2017, columns=['Const'], drop_first=True)

result_2022 = pd.get_dummies(result_2022, columns=['District'], drop_first=True)
result_2022 = pd.get_dummies(result_2022, columns=['Const'], drop_first=True)

In [47]:
result_2008.head()
result_2013.head()
result_2017.head()
result_2022.head()

Unnamed: 0.1,Unnamed: 0,WParty,WVotes,Age,TurnOut%,WParty_encoded,District_Baitadi,District_Bajhang,District_Bara,District_Dailekh,...,Const_सप्तरी-4,Const_सल्यान-1,Const_सिन्धुपाल्चोक-1,Const_सिन्धुपाल्चोक-2,Const_सिन्धुली-1,Const_सिन्धुली-2,Const_सुर्खेत-1,Const_सुर्खेत-2,Const_सोलुखुम्बु-1,Const_हुम्ला-1
0,0,Nepal Communist Party (UML),21943,56,49.859126,28,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
1,1,Nepal Communist Party (Maoist Center) (Unified...,21735,46,53.744678,26,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
2,2,Federal Democratic National Front,941,56,54.895159,5,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
3,3,"Janata Samajwadi Party, Nepal",628,49,51.052482,15,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
4,4,Rastriya Prajatantra Party,406,65,54.328004,51,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False
