In [None]:
def fetch_nasa_power_data(latitude, longitude, start_date, end_date):
    """
    this part of the project
    collects data from the API
    """
    base_url = "https://power.larc.nasa.gov/api/temporal/daily/point"
    params = {
        "parameters": "ALLSKY_SFC_SW_DWN,WS10M,T2M",
        "community": "RE",
        "longitude": longitude,
        "latitude": latitude,
        "start": start_date,
        "end": end_date,
        "format": "JSON"
    }

    response = requests.get(base_url, params=params)


    return response.json()


In [None]:
def convert_to_dataframe(data, lat, lon):
    #makes the dataframe from the API ingestion
    parameters = data.get('properties', {}).get('parameter', {})
    allsky = parameters.get('ALLSKY_SFC_SW_DWN', {})
    ws10m = parameters.get('WS10M', {})
    t2m = parameters.get('T2M', {})

    records = []
    for date_str, allsky_val in allsky.items():
        ws10m_val = ws10m.get(date_str, -999.0)
        t2m_val = t2m.get(date_str, -999.0)


        date_obj = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}"

        records.append({
            "latitude": lat,
            "longitude": lon,
            "date": date_obj,
            "allsky_sfc_sw_dwn": allsky_val,
            "ws10m": ws10m_val,
            "t2m": t2m_val
        })

    return pd.DataFrame(records)

In [None]:
import numpy as np
import requests
import pandas as pd
import sqlite3
import time



lat_min, lat_max, lat_step = 5, 55, 5
lon_min, lon_max, lon_step = -135, -90, 5

start = "20160901"
end = "20161231"
output_filename = "solar_radiation_data.csv"


conn = sqlite3.connect('solar_data.db')


for lat in range(5, 60, 5):
    for lon in range(-135, -85, 5):


        data = fetch_nasa_power_data(lat, lon, start, end)
        print(f"Successfully fetched data for this lat/lon", lat, lon)

        df = convert_to_dataframe(data, lat, lon)

        df.to_sql('solar_radiation', conn, if_exists="append", index=False)
        print(f"Persisted the rows for latitude:{lat}, longitude:{lon} to DB.")




conn.commit()


cur = conn.cursor()
cur.execute(
    "SELECT name FROM sqlite_master WHERE type='table' AND name=?;",
    (TABLE_NAME,)
)
table_exists = cur.fetchone() is not None


big_df = pd.read_sql_query(f"SELECT * FROM solar_radiation;", conn)
big_df.to_csv("solar_radiation_data.csv", index=False)
print(f"\nSuccessfully saved all data from DB to {output_filename}.")

conn.close()


In [None]:

import numpy as np
import pandas as pd

def load_and_clean_solar_data(csv_path: str) -> pd.DataFrame:



  df = pd.read_csv(csv_path)


  df["date"] = pd.to_datetime(df["date"], errors="coerce")
  df = df.dropna(subset=["date"])


  numeric_cols = ["allsky_sfc_sw_dwn", "ws10m", "t2m"]


  for col in numeric_cols:
      if col in df.columns:
          df[col] = df[col].fillna(df[col].median())


  df["year"]        = df["date"].dt.year
  df["month"]       = df["date"].dt.month
  df["day"]         = df["date"].dt.day
  df["day_of_year"] = df["date"].dt.dayofyear


  doy = df["day_of_year"].astype(float)


  return df

In [None]:
import pandas as pd


df_nasa = pd.read_csv("/content/solar_radiation_data.csv")
df_sensor = pd.read_csv("/content/SolarPrediction.csv")


df_nasa["date"] = pd.to_datetime(df_nasa["date"]).dt.date


# this was done to merge the datetime
df_sensor["datetime"] = pd.to_datetime(
    df_sensor["Data"].str.strip(),
    format="%m/%d/%Y %I:%M:%S %p"
)


df_sensor["date"] = df_sensor["datetime"].dt.date


target_lat = df_nasa["latitude"].iloc[0]
target_lon = df_nasa["longitude"].iloc[0]

df_nasa_site = df_nasa[
    (df_nasa["latitude"] == target_lat) &
    (df_nasa["longitude"] == target_lon)
].copy()

target_lat = df_nasa["latitude"].iloc[0]
target_lon = df_nasa["longitude"].iloc[0]

df_nasa_site = df_nasa[
    (df_nasa["latitude"] == target_lat) &
    (df_nasa["longitude"] == target_lon)
].copy()

#this merges the dataframe
merged_df = pd.merge(df_sensor, df_nasa_site, on="date",how="left")


merged_df.to_csv("merged_df.csv", index=False)




print(merged_df.head())

print(len(merged_df))


In [None]:

from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
X_train_reg = X_train_scaled
X_test_reg = X_test_scaled
y_train = Y_train.iloc[:, 0]
y_test = Y_test.iloc[:,0]

#code for decision tree  regression

decision_tree_model = DecisionTreeRegressor(max_depth = 7, random_state = 84)
decision_tree_model.fit(X_train, y_train)

decision_regression_prediction_training = decision_tree_model.predict(X_train)
decision_regression_prediction_testing = decision_tree_model.predict(X_test)

error_decision_train = mean_squared_error(y_train, decision_regression_prediction_training)
error_decision_test = mean_squared_error(y_test, decision_regression_prediction_testing)
r2_decision_train = r2_score(y_train, decision_regression_prediction_training)
r2_decision_test = r2_score(y_test, decision_regression_prediction_testing)
print("Below are the outputs for decision tree regressor")
print("Train error: ", np.sqrt(error_decision_train))
print("Test error: ", np.sqrt(error_decision_test))
print("Train R2: ", r2_decision_train)
print("Test R2: ", r2_decision_train)

#Code for forest regressor
#code for decision tree  regression

forest_tree_model = RandomForestRegressor(n_estimators = 1, random_state = 42)
forest_tree_model.fit(X_train, y_train)

forest_regression_prediction_training = forest_tree_model.predict(X_train)
forest_regression_prediction_testing = forest_tree_model.predict(X_test)

error_forest_train = mean_squared_error(y_train, forest_regression_prediction_training)
error_forest_test = mean_squared_error(y_test, forest_regression_prediction_testing)
r2_forest_train = r2_score(y_train, forest_regression_prediction_training)
r2_forest_test = r2_score(y_train, forest_regression_prediction_training)
print("Below are the outputs for decision tree regressor")
print("Train error: ", np.sqrt(error_forest_train))
print("Test error: ", np.sqrt(error_forest_test))
print("Train R2: ", r2_forest_train)
print("Test R2: ", r2_forest_test)

#Code for gxboost regressor

xgb_tree_model = XGBRegressor(n_estimators = 1, learning_rate = .01)
xgb_tree_model.fit(X_train, y_train)

xgb_regression_prediction_training = xgb_tree_model.predict(X_train)
xgb_regression_prediction_testing = xgb_tree_model.predict(X_test)

error_xgb_train = mean_squared_error(y_train, xgb_regression_prediction_training)
error_xgb_test = mean_squared_error(y_test, xgb_regression_prediction_testing)
r2_xgb_train = r2_score(y_train, xgb_regression_prediction_training)
r2_xgb_test = r2_score(y_train, xgb_regression_prediction_training)
print("Below are the outputs for xgb tree regressor")
print("Train error: ", np.sqrt(error_forest_train))
print("Test error: ", np.sqrt(error_forest_test))
print("Train R2: ", r2_forest_train)
print("Test R2: ", r2_forest_test)

#graphing forest, we deemed it best
plt.figure(figsize = (20,5))

plt.subplot(1,3,1)
plt.scatter(y_train, forest_regression_prediction_training, alpha = .5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw =2)
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Actual vs Predicted Values for Forest Regression")

residuals = y_test - forest_regression_prediction_testing
plt.subplot(1,3,2)
plt.scatter(forest_regression_prediction_testing, residuals, alpha = .5)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], 'r--', lw =2)
plt.xlabel("Predicted values")
plt.ylabel("Residuals")
plt.title("Residual Plot for Random Forest")


plt.subplot(1,3,3)
coef_values = forest_tree_model.feature_importances_
#just to make it more viewable in colab UI, we restricted to 8 features
features = [f"feature_{i}" for i in range(X_train_reg.shape[1])]
coefficients = forest_tree_model.feature_importances_
coef_series = pd.Series(coefficients, index = features)
coef_series_shortened = coef_series.head(8)
coef_series_shortened.plot(kind = 'barh')
plt.tight_layout()
plt.title("Feature Importance of Forest Model")
plt.xlabel("Coefficient Value")
plt.show()

