<a href="https://colab.research.google.com/github/ardiusebenezer07/Machine-Learning/blob/main/Linier_Regression_FinancialMarket_csv_Ardius_Ebenezer_1103210208.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Import required libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# Load the financial market dataset
df = pd.read_csv('FinancialMarket.csv')

In [4]:
# Display the first few rows and basic info about the dataset
print("Dataset Overview:")
print(df.head())
print("\nDataset Info:")
print(df.info())

Dataset Overview:
         x  combined_data
0  0.00000       0.003073
1  0.01001       0.033492
2  0.02002      -0.027269
3  0.03003       0.176319
4  0.04004      -0.006027

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   x              1000 non-null   float64
 1   combined_data  1000 non-null   float64
dtypes: float64(2)
memory usage: 15.8 KB
None


In [13]:
# Function to prepare data for modeling
def prepare_data(df, target_column):
    """
    Prepare the data for modeling by separating features and target

    Parameters:
    df : DataFrame containing the financial data
    target_column : Name of the column to predict

    Returns:
    X : Feature matrix
    y : Target vector
    """
    # Separate features (X) and target variable (y)
    X = Data.drop(['Dtype'],axis=1).values
    y = Data['Dtype'].values

    return X, y

In [None]:
# Split the data into training and testing sets
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=0)

In [None]:
# Replace 'target_column' with your actual target variable name (e.g., 'Price')
X, y = prepare_data(df, 'Dtype')  # Replace 'target_column' with actual column name

In [None]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Function to evaluate regression models
def evaluate_model(y_true, y_pred, model_name):
    """
    Calculate and print various regression metrics

    Parameters:
    y_true : Actual target values
    y_pred : Predicted target values
    model_name : Name of the model for printing purposes

    Returns:
    dict : Dictionary containing all calculated metrics
    """
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)

    print(f"\n{model_name} Results:")
    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Root Mean Squared Error: {rmse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"R-squared Score: {r2:.4f}")

    return {'mse': mse, 'rmse': rmse, 'mae': mae, 'r2': r2}

In [None]:
# 1. Standard Linear Regression
print("\n--- Standard Linear Regression ---")

In [None]:
# Initialize and train the standard linear regression model
linear_reg = LinearRegression()
linear_reg.fit(X_train_scaled, y_train)

In [None]:
# Make predictions
y_pred_linear = linear_reg.predict(X_test_scaled)

In [None]:
# Evaluate standard linear regression
linear_metrics = evaluate_model(y_test, y_pred_linear, "Standard Linear Regression")

In [None]:
# 2. Polynomial Regression (Linear Regression with Basis Functions)
print("\n--- Polynomial Regression ---")

In [None]:
# Create polynomial features
poly = PolynomialFeatures(degree=2, include_bias=False)
X_train_poly = poly.fit_transform(X_train_scaled)
X_test_poly = poly.transform(X_test_scaled)

In [None]:
# Initialize and train the polynomial regression model
poly_reg = LinearRegression()
poly_reg.fit(X_train_poly, y_train)

In [None]:
# Make predictions
y_pred_poly = poly_reg.predict(X_test_poly)

In [None]:
# Evaluate polynomial regression
poly_metrics = evaluate_model(y_test, y_pred_poly, "Polynomial Regression")

In [None]:
# Visualization functions
def plot_actual_vs_predicted(y_test, y_pred_linear, y_pred_poly):
    """
    Create a plot comparing actual values with predictions from both models
    """
    plt.figure(figsize=(15, 6))

    # Plot for standard linear regression
    plt.subplot(121)
    plt.scatter(y_test, y_pred_linear, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel("Actual Values")
    plt.ylabel("Predicted Values")
    plt.title("Standard Linear Regression")

    # Plot for polynomial regression
    plt.subplot(122)
    plt.scatter(y_test, y_pred_poly, alpha=0.5)
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw=2)
    plt.xlabel("Actual Values")
    plt.ylabel("Predicted Values")
    plt.title("Polynomial Regression")

    plt.tight_layout()
    plt.show()

def plot_residuals(y_test, y_pred_linear, y_pred_poly):
    """
    Create residual plots for both models
    """
    plt.figure(figsize=(15, 6))

    # Residual plot for standard linear regression
    plt.subplot(121)
    residuals_linear = y_test - y_pred_linear
    plt.scatter(y_pred_linear, residuals_linear, alpha=0.5)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel("Predicted Values")
    plt.ylabel("Residuals")
    plt.title("Residual Plot - Standard Linear Regression")

    # Residual plot for polynomial regression
    plt.subplot(122)
    residuals_poly = y_test - y_pred_poly
    plt.scatter(y_pred_poly, residuals_poly, alpha=0.5)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.xlabel("Predicted Values")
    plt.ylabel("Residuals")
    plt.title("Residual Plot - Polynomial Regression")

    plt.tight_layout()
    plt.show()

In [None]:
# Generate visualizations
plot_actual_vs_predicted(y_test, y_pred_linear, y_pred_poly)
plot_residuals(y_test, y_pred_linear, y_pred_poly)

In [None]:
# Feature importance analysis
def plot_feature_importance(model, feature_names, model_name):
    """
    Plot feature importance for the given model
    """
    importance = pd.DataFrame({
        'feature': feature_names,
        'importance': abs(model.coef_)
    })
    importance = importance.sort_values('importance', ascending=False)

    plt.figure(figsize=(12, 6))
    sns.barplot(x='importance', y='feature', data=importance.head(10))
    plt.title(f"Top 10 Feature Importance - {model_name}")
    plt.tight_layout()
    plt.show()

In [None]:
# Plot feature importance for standard linear regression
plot_feature_importance(linear_reg, X.columns, "Standard Linear Regression")

In [None]:
# For polynomial regression, get the feature names and plot importance
poly_feature_names = poly.get_feature_names_out(X.columns)
plot_feature_importance(poly_reg, poly_feature_names, "Polynomial Regression")

In [None]:
# Print comparison summary
print("\n--- Model Comparison Summary ---")
comparison_df = pd.DataFrame({
    'Metric': ['MSE', 'RMSE', 'MAE', 'R-squared'],
    'Standard Linear Regression': [linear_metrics['mse'], linear_metrics['rmse'],
                                  linear_metrics['mae'], linear_metrics['r2']],
    'Polynomial Regression': [poly_metrics['mse'], poly_metrics['rmse'],
                             poly_metrics['mae'], poly_metrics['r2']]
})
print(comparison_df)