In [1]:
import numpy as np
import pandas as pd
import statsmodels.api as sm


In [7]:
# Example data
data = {
    'Independent1': [1, 2, 3, 4, 5],
    'Independent2': [2, 3, 4, 5, 6],
    'Independent3': [5, 6, 7, 8, 9],
    'dependent': [1, 2, 1.5, 3.5, 2]
}

df = pd.DataFrame(data)


In [8]:
df

Unnamed: 0,Independent1,Independent2,Independent3,dependent
0,1,2,5,1.0
1,2,3,6,2.0
2,3,4,7,1.5
3,4,5,8,3.5
4,5,6,9,2.0


In [9]:
X = df[['Independent1', 'Independent2', 'Independent3']]  # Independent variables
Y = df['dependent']  # Dependent variable


In [10]:
X = sm.add_constant(X)


In [13]:
# fit regression model
model = sm.OLS(Y, X).fit()
print(model.summary())


                            OLS Regression Results                            
Dep. Variable:              dependent   R-squared:                       0.350
Model:                            OLS   Adj. R-squared:                  0.133
Method:                 Least Squares   F-statistic:                     1.615
Date:                Mon, 19 Aug 2024   Prob (F-statistic):              0.293
Time:                        06:43:49   Log-Likelihood:                -5.1260
No. Observations:                   5   AIC:                             14.25
Df Residuals:                       3   BIC:                             13.47
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const            0.0379      0.139      0.273   

  warn("omni_normtest is not valid with less than 8 observations; %i "


METHOD TWO: USING SCIKIT-LEARN

In [14]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


In [15]:
# Example data
data = {
    'X1': [1, 2, 3, 4, 5],
    'X2': [2, 3, 4, 5, 6],
    'X3': [5, 6, 7, 8, 9],
    'Y': [1, 2, 1.5, 3.5, 2]
}

df = pd.DataFrame(data)


In [16]:
X = df[['X1', 'X2', 'X3']]  # Independent variables
Y = df['Y']  # Dependent variable


In [17]:
model = LinearRegression()
model.fit(X, Y)


In [18]:
Y_pred = model.predict(X)


In [19]:
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
print('Mean squared error (MSE): %.2f' % mean_squared_error(Y, Y_pred))
print('Coefficient of determination (R^2): %.2f' % r2_score(Y, Y_pred))


# Example Interpretation:
# Coefficients represent the impact of each independent variable on the dependent variable.
# Intercept is the expected value of Y when all X variables are zero.
# R^2 shows the proportion of variance in the dependent variable explained by the independent variables.
# MSE is a measure of the average squared difference between observed and predicted values.

Coefficients: [0.11666667 0.11666667 0.11666667]
Intercept: 0.36666666666666603
Mean squared error (MSE): 0.45
Coefficient of determination (R^2): 0.35


FOR NETCDF

In [None]:
import xarray as xr

# Load the NetCDF file
data = xr.open_dataset('data.nc')

# Print the structure of the dataset
print(data)


In [None]:
# Extract the variables as numpy arrays or pandas DataFrames
X1 = data['X1'].values
X2 = data['X2'].values
X3 = data['X3'].values
Y = data['Y'].values

# You might want to flatten the arrays if they are multi-dimensional
X1_flat = X1.flatten()
X2_flat = X2.flatten()
X3_flat = X3.flatten()
Y_flat = Y.flatten()

# Combine independent variables into a single DataFrame or array
import pandas as pd

X = pd.DataFrame({
    'X1': X1_flat,
    'X2': X2_flat,
    'X3': X3_flat
})


In [None]:
#Performing Multiple Linear Regression

import statsmodels.api as sm

# Add a constant to the independent variables
X = sm.add_constant(X)

# Fit the model
model = sm.OLS(Y_flat, X).fit()

# Print the summary
print(model.summary())


USING SCIKIT LEARN

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Initialize the model
model = LinearRegression()

# Fit the model
model.fit(X, Y_flat)

# Make predictions
Y_pred = model.predict(X)

# Evaluate the model
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)
print('Mean squared error (MSE): %.2f' % mean_squared_error(Y_flat, Y_pred))
print('Coefficient of determination (R^2): %.2f' % r2_score(Y_flat, Y_pred))
