In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression,
LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing

# Data
data = {
'Year': [1991, 2001, 2011],
'LIT_RATE': [89.81, 90.8638078669355, 93.9956715484187],
'SEX_RATIO': [1036, 1058.45035631505, 1084.30787203823],
'PROP_WORK': [31.4, 32.29724634370368, 34.7813021116138],
'PROP_NONWORK': [68.6, 67.70275, 65.2187],
'POP': [29098518, 31841374, 33406061],
'NO_HOUSEHOLD': [5194058, 6726356, 7853754],
}
df = pd.DataFrame(data)

# Features and target
X = df[['LIT_RATE', 'SEX_RATIO', 'POP', 'NO_HOUSEHOLD']]
y = df['PROP_WORK']

# Linear Regression
lin_reg = LinearRegression()
lin_reg.fit(X, y)
y_pred = lin_reg.predict(X)

print("Linear Regression Coefficients:", lin_reg.coef_)
print("Predictions:", y_pred)
print("Mean Squared Error:", mean_squared_error(y, y_pred))

# Random Forest with 1 tree (mimics a decision tree)
rf = RandomForestRegressor(n_estimators=1, random_state=42)
rf.fit(X, y)
y_pred_rf = rf.predict(X)

print("Decision Tree (RF) Predictions:", y_pred_rf)
print("Mean Squared Error:", mean_squared_error(y, y_pred_rf))

# Random Forest
rf_full = RandomForestRegressor(n_estimators=10,
random_state=42)
rf_full.fit(X, y)
y_pred_rf_full = rf_full.predict(X)

# XGBoost
xgb = XGBRegressor(n_estimators=10, random_state=42)
xgb.fit(X, y)
y_pred_xgb = xgb.predict(X)

# Compare
print("Random Forest Predictions:", y_pred_rf_full)
print("Random Forest MSE:", mean_squared_error(y,y_pred_rf_full))
print("XGBoost Predictions:", y_pred_xgb)
print("XGBoost MSE:", mean_squared_error(y, y_pred_xgb))

# Plot
plt.figure(figsize=(10, 6))
plt.plot(df['Year'], y, marker='o', label='Actual')
plt.plot(df['Year'], y_pred_rf_full, marker='x', label='Random Forest')
plt.plot(df['Year'], y_pred_xgb, marker='s', label='XGBoost')
plt.title('Work Participation Prediction')
plt.xlabel('Year')
plt.ylabel('PROP_WORK (%)')
plt.legend()
plt.grid(True)
plt.show()

# 2. Calculate Changes Over Time
# Calculate differences
df_diff = df.set_index('Year').diff().reset_index()
print("Differences between years:")
print(df_diff)

# 3. Calculate Percentage Growth Rates
# Calculate percentage changes
df_pct_change = df.set_index('Year').pct_change() * 100
print("Percentage changes (%):")
print(df_pct_change.reset_index())

# 4. Visualize Trends
# Plot Literacy Rate over time
plt.figure(figsize=(10, 6))
plt.plot(df['Year'], df['LIT_RATE'], marker='o', label='Literacy Rate')
plt.title('Literacy Rate Over Time')
plt.xlabel('Year')
plt.ylabel('Literacy Rate (%)')
plt.grid(True)
plt.legend()
plt.show()

# Plot multiple variables
plt.figure(figsize=(10, 6))
plt.plot(df['Year'], df['POP'], marker='o',
label='Population')
plt.plot(df['Year'], df['NO_HOUSEHOLD'], marker='o',
label='No. of Households')
plt.title('Population and Households Over Time')
plt.xlabel('Year')
plt.ylabel('Count')
plt.legend()
plt.grid(True)
plt.show()

# 5. Advanced Time Series Analysis
# Example: Exponential Smoothing for LIT_RATE
model = ExponentialSmoothing(df['LIT_RATE'], trend='add')
fit = model.fit()
forecast = fit.forecast(1) # Predict for 2021
print("Forecasted Literacy Rate for 2021:", forecast.iloc[0])

# Plot fitted values
plt.figure(figsize=(10, 6))
plt.plot(df['Year'], df['LIT_RATE'], marker='o',
label='Actual')
plt.plot(df['Year'], fit.fittedvalues, marker='x',
label='Fitted')
plt.title('Literacy Rate with Exponential Smoothing')
plt.xlabel('Year')
plt.ylabel('Literacy Rate (%)')
plt.legend()
plt.grid(True)
plt.show()