<a href="https://colab.research.google.com/github/cedamusk/AI-N-ML/blob/main/Linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import matplotlib.pyplot as plt
from scipy import stats

In [None]:
data=pd.read_csv('/content/synthetic_renewable_energy_analysis.csv')

In [None]:
print("First few rows:")
print(data.head())
print("\nColumn names:")
print(data.columns)

In [None]:
def analyze_renewable_gdp_correlation(data):
  results={}

  for country in data ['Country'].unique():
    country_data=data[data['Country']==country]

    X=country_data['Renewable_Energy_Share (%)'].values.reshape(-1,1)
    y=country_data['GDP_Growth_Rate (%)'].values

    model=LinearRegression()
    model.fit(X, y)

    y_pred=model.predict(X)

    mae=mean_absolute_error(y, y_pred)
    mse=mean_squared_error(y, y_pred)
    mse=np.sqrt(mse)
    rmse=np.sqrt(mse)
    r_squared=model.score(X, y)
    adjusted_r_squared=1-(1-r_squared)*(len(y)-1)/(len(y)-X.shape[1]-1)

    correlation_coef, p_value=stats.pearsonr(
        country_data['Renewable_Energy_Share (%)'],
        country_data['GDP_Growth_Rate (%)']
    )

    results[country]={
        'slope':model.coef_[0],
        'intercept': model.intercept_,
        'mae':mae,
        'mse':mse,
        'rmse':rmse,
        'r_squared': r_squared,
        'adjusted_r_squared': adjusted_r_squared,
        'correlation_coef': correlation_coef,
        'p_value': p_value,
        'data':{
            'X':X,
            'y':y,
            'y_pred': y_pred
        }

    }

  return results

In [None]:
def plot_comparison(data, results):
  plt.figure(figsize=(12, 6))

  countries=list(results.keys())
  colors=['blue', 'green']

  for i, (country, color) in enumerate (zip(countries, colors)):
    country_results=results[country]

    plt.scatter(
        country_results['data']['X'],
        country_results['data']['y_pred'],
        color=color,
        linestyle='--',
        label=f'{country}(Predicted)'
    )

  plt.xlabel('Renewable Energy Share (%)')
  plt.ylabel('GDP Growth Rate (%)')
  plt.title('Relationship between Renewable Energy Share and GDP Growth ')
  plt.legend()
  plt.grid(True, alpha=0.3)

  return plt

In [None]:
def print_detailed_metrics(results):
  for country, result in results.items():
    print(f"\nDetailed metrics for {country}:")
    print("-"* 40)
    print(f"Model Parameters:")
    print(f"Slope: {result['slope']:.4f}")
    print(f" Intercept: {result['intercept']:.4f}")
    print("\nError Metrics:")
    print(f"Mean Absolute Error (MAE): {result['mae']:.4f}")
    print(f"Mean Squared Error (MSE): {result['mse']:.4f}")
    print(f" Root Mean Squared Error (RMSE): {result['rmse']:.4f}")
    print("\nGoodness of fit:")
    print(f"R-Squared:{result['r_squared']:.4f}")
    print(f"Adjusted R-Squared: {result['adjusted_r_squared']:.4f}")
    print("\nCorrelation Analysis:")
    print(f"Correlation coefficient: {result['correlation_coef']:.4f}")
    print(f"P_value: {result['p_value']:.4f}")

In [None]:
data=pd.read_csv('/content/synthetic_renewable_energy_analysis.csv')
results=analyze_renewable_gdp_correlation(data)
print_detailed_metrics(results)

plot=plot_comparison(data, results)
plt.show()
