In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Step 1: Load the dataset
df = pd.read_csv("SolarPrediction.csv")
df.head()

Unnamed: 0,UNIXTime,Data,Time,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,TimeSunRise,TimeSunSet
0,1475229326,9/29/2016 12:00:00 AM,23:55:26,1.21,48,30.46,59,177.39,5.62,06:13:00,18:13:00
1,1475229023,9/29/2016 12:00:00 AM,23:50:23,1.21,48,30.46,58,176.78,3.37,06:13:00,18:13:00
2,1475228726,9/29/2016 12:00:00 AM,23:45:26,1.23,48,30.46,57,158.75,3.37,06:13:00,18:13:00
3,1475228421,9/29/2016 12:00:00 AM,23:40:21,1.21,48,30.46,60,137.71,3.37,06:13:00,18:13:00
4,1475228124,9/29/2016 12:00:00 AM,23:35:24,1.17,48,30.46,62,104.95,5.62,06:13:00,18:13:00


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32686 entries, 0 to 32685
Data columns (total 11 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   UNIXTime                32686 non-null  int64  
 1   Data                    32686 non-null  object 
 2   Time                    32686 non-null  object 
 3   Radiation               32686 non-null  float64
 4   Temperature             32686 non-null  int64  
 5   Pressure                32686 non-null  float64
 6   Humidity                32686 non-null  int64  
 7   WindDirection(Degrees)  32686 non-null  float64
 8   Speed                   32686 non-null  float64
 9   TimeSunRise             32686 non-null  object 
 10  TimeSunSet              32686 non-null  object 
dtypes: float64(4), int64(3), object(4)
memory usage: 2.7+ MB


In [None]:
df.describe()

Unnamed: 0,UNIXTime,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed
count,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0,32686.0
mean,1478047000.0,207.124697,51.103255,30.422879,75.016307,143.489821,6.243869
std,3005037.0,315.916387,6.201157,0.054673,25.990219,83.1675,3.490474
min,1472724000.0,1.11,34.0,30.19,8.0,0.09,0.0
25%,1475546000.0,1.23,46.0,30.4,56.0,82.2275,3.37
50%,1478026000.0,2.66,50.0,30.43,85.0,147.7,5.62
75%,1480480000.0,354.235,55.0,30.46,97.0,179.31,7.87
max,1483265000.0,1601.26,71.0,30.56,103.0,359.95,40.5


In [None]:
df.isnull().sum()

Unnamed: 0,0
UNIXTime,0
Data,0
Time,0
Radiation,0
Temperature,0
Pressure,0
Humidity,0
WindDirection(Degrees),0
Speed,0
TimeSunRise,0


In [None]:
# Convert UNIX timestamp to datetime (assuming it's in seconds)
df['datetime'] = pd.to_datetime(df['UNIXTime'], unit='s')

# Extract time-based features
df['Hour'] = df['datetime'].dt.hour
df['Month'] = df['datetime'].dt.month
df['Year'] = df['datetime'].dt.year
df.head()

Unnamed: 0,UNIXTime,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,Hour,Month,Year,datetime
0,1475229326,1.21,48,30.46,59,177.39,5.62,9,9,2016,2016-09-30 09:55:26
1,1475229023,1.21,48,30.46,58,176.78,3.37,9,9,2016,2016-09-30 09:50:23
2,1475228726,1.23,48,30.46,57,158.75,3.37,9,9,2016,2016-09-30 09:45:26
3,1475228421,1.21,48,30.46,60,137.71,3.37,9,9,2016,2016-09-30 09:40:21
4,1475228124,1.17,48,30.46,62,104.95,5.62,9,9,2016,2016-09-30 09:35:24


In [None]:
# Drop unnecessary columns (time dependent variables)
df = df.drop(columns=['Time', 'Data','TimeSunRise','TimeSunSet','datetime'])
df.head()

Unnamed: 0,UNIXTime,Radiation,Temperature,Pressure,Humidity,WindDirection(Degrees),Speed,Hour,Month,Year
0,1475229326,1.21,48,30.46,59,177.39,5.62,9,9,2016
1,1475229023,1.21,48,30.46,58,176.78,3.37,9,9,2016
2,1475228726,1.23,48,30.46,57,158.75,3.37,9,9,2016
3,1475228421,1.21,48,30.46,60,137.71,3.37,9,9,2016
4,1475228124,1.17,48,30.46,62,104.95,5.62,9,9,2016


In [None]:
# define features and output
from sklearn.model_selection import train_test_split
x = df.drop(columns=["Radiation"])
y= df['Radiation']

In [None]:
# Step 4: Split data into training and testing sets (80% train, 20% test)
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
# Step 5: Normalize the data to improve model performance
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtest = scaler.transform(xtest)

In [None]:
# Step 6: Train the Linear Regression model

from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(xtrain, ytrain)

In [None]:
# Step 7: Make predictions
ypred = model.predict(xtest)


In [None]:
# Step 8: Evaluate the model
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
mae = mean_absolute_error(ytest, ypred)
mse = mean_squared_error(ytest, ypred)
r2 = r2_score(ytest, ypred)

# Print results
print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)
print("R-squared Score:", r2)

Mean Absolute Error: 124.07594377517458
Mean Squared Error: 27764.02888465929
R-squared Score: 0.7217877935836704
