In [None]:
##plotting each varaible against the target variable 
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy import stats

def create_regression_dataset():
    data = pd.read_csv('C:/Users/jess_besnier/Documents/Ireland_summer_2023/ML_aspect/input_and_discharge_actual_mock.csv') 
    df = data[['Timestamp','rain', 'Tmax', 'Tmin', 'wdsp', 'Rhmean', 'solar_rad', 'bp_discharge']] 
    df['time'] = pd.to_datetime(df['Timestamp'], format='%m/%d/%Y')
    df = df.drop('Timestamp', axis = 1)
    df_n = df.dropna()
    df_n.to_csv('discharge_regression_data.csv')
    X = df_n[['rain', 'Tmax', 'Tmin', 'wdsp', 'Rhmean', 'solar_rad']]
    y = df_n['bp_discharge']
    
    # Plotting each feature against the target variable
    fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(15, 10))
    fig.suptitle('Features vs. Discharge', fontsize=16)
    
    features = X.columns
    for i, ax in enumerate(axes.flatten()):
        x = X[features[i]]
        ax.scatter(x, y)
        ax.set_xlabel(features[i])
        ax.set_ylabel('Discharge')
        ax.grid(True)
        
        # Determine the relationship between feature and discharge
        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
        
        if p_value < 0.05:
            relationship = 'Linear'
            equation = f'y = {intercept:.2f} + {slope:.2f}x'
            ax.plot(x, intercept + slope * x, color='red')
        else:
            relationship = 'No clear relationship'
            equation = 'N/A'
        
        correlation = np.corrcoef(x, y)[0, 1]
        ax.text(0.5, 0.9, f"Relationship: {relationship}\nCorrelation: {correlation:.2f}\nEquation: {equation}", transform=ax.transAxes, ha='center')
    
    plt.tight_layout()
    plt.show()
    
    return df_n, X, y

def create_datasets():
    """Create example datasets"""
    df_regression, X, y = create_regression_dataset()
    return df_regression

if __name__ == '__main__':
    create_datasets()
