In [1]:
import requests
import pandas as pd
import math

def air_density(temp_c, rh, pressure=101325):
    """Calculate air density (kg/mÂ³) using Ideal Gas Law with humidity adjustment."""
    temp_k = temp_c + 273.15
    es = 6.112 * math.exp((17.67 * temp_c) / (temp_c + 243.5)) * 100  # saturation vapor pressure (Pa)
    e = rh / 100.0 * es  # actual vapor pressure
    rv = 461.495
    rd = 287.058
    rho = ((pressure - e) / (rd * temp_k)) + (e / (rv * temp_k))
    return rho

def fetch_dataset(lat, lon, start="2013", end="2023", output_file="wind_dataset.csv"):
    """Fetch wind speed, temp, humidity for a location and compute power output."""
    url = (
        f"https://power.larc.nasa.gov/api/temporal/daily/point"
        f"?parameters=WS10M,T2M,RH2M"
        f"&community=RE"
        f"&longitude={lon}"
        f"&latitude={lat}"
        f"&start={start}0101"
        f"&end={end}1231"
        f"&format=JSON"
    )

    response = requests.get(url)
    data = response.json()

    if "properties" not in data or "parameter" not in data["properties"]:
        raise ValueError("No parameter data returned. Check API response.")

    params = data["properties"]["parameter"]

    # ðŸ”‘ Convert nested dict â†’ DataFrame
    df = pd.DataFrame(params)
    df.index.name = "Date"
    df.reset_index(inplace=True)

    # Convert YYYYMMDD â†’ YYYY-MM-DD
    df["Date"] = pd.to_datetime(df["Date"], format="%Y%m%d")

    # Ensure numeric
    df = df.astype({"WS10M": float, "T2M": float, "RH2M": float})

    # Air density
    df["Air_Density"] = df.apply(lambda row: air_density(row["T2M"], row["RH2M"]), axis=1)

    # Turbine parameters (example: 2 MW, rotor diameter 80m)
    R = 40
    A = math.pi * R**2
    Cp = 0.45
    eta = 0.9

    df["Power_Output_kW"] = 0.5 * df["Air_Density"] * A * (df["WS10M"]**3) * Cp * eta / 1000

    # Keep only needed columns
    df = df[["Date", "WS10M", "T2M", "RH2M", "Power_Output_kW"]]
    df.rename(columns={
        "WS10M": "Wind_Speed(m/s)",
        "T2M": "Temperature(C)",
        "RH2M": "Humidity(%)",
        "Power_Output_kW": "Power_Output(kW)"
    }, inplace=True)

    # Save CSV
    df.to_csv(output_file, index=False)
    print(f"âœ… Dataset saved to {output_file} with {len(df)} rows.")
    return df


# Example run (Bangalore, Karnataka)
dataset = fetch_dataset(12.9716, 77.5946, "2013", "2023")
print(dataset.head())

âœ… Dataset saved to wind_dataset.csv with 4017 rows.
        Date  Wind_Speed(m/s)  Temperature(C)  Humidity(%)  Power_Output(kW)
0 2013-01-01             3.69           23.45        66.15         60.429057
1 2013-01-02             3.10           22.50        57.25         35.992715
2 2013-01-03             2.77           22.66        59.36         25.657505
3 2013-01-04             2.77           24.19        61.64         25.503745
4 2013-01-05             2.86           24.48        60.59         28.043921


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor,BaggingRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, ShuffleSplit, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

In [3]:
import warnings

In [4]:
data=pd.read_csv('wind_dataset.csv')
data.head()

Unnamed: 0,Date,Wind_Speed(m/s),Temperature(C),Humidity(%),Power_Output(kW)
0,2013-01-01,3.69,23.45,66.15,60.429057
1,2013-01-02,3.1,22.5,57.25,35.992715
2,2013-01-03,2.77,22.66,59.36,25.657505
3,2013-01-04,2.77,24.19,61.64,25.503745
4,2013-01-05,2.86,24.48,60.59,28.043921


In [5]:
data.isnull().sum()

Date                0
Wind_Speed(m/s)     0
Temperature(C)      0
Humidity(%)         0
Power_Output(kW)    0
dtype: int64

In [6]:
feature=data.drop(['Date','Power_Output(kW)'],axis=1)
feature.head()

Unnamed: 0,Wind_Speed(m/s),Temperature(C),Humidity(%)
0,3.69,23.45,66.15
1,3.1,22.5,57.25
2,2.77,22.66,59.36
3,2.77,24.19,61.64
4,2.86,24.48,60.59


In [7]:
target=pd.DataFrame()
target['Power_Output(kW)']=data['Power_Output(kW)']
target.head()

Unnamed: 0,Power_Output(kW)
0,60.429057
1,35.992715
2,25.657505
3,25.503745
4,28.043921


# Model selection

## using ShuffleSplit 

In [8]:
cv=ShuffleSplit(n_splits=6)

In [9]:
model1=LinearRegression()
warnings.filterwarnings('ignore')
cross_val_score(model1,feature,target,cv=cv)

array([0.84254984, 0.85015952, 0.84249861, 0.83826923, 0.84357118,
       0.84657929])

In [10]:
model2=SVR()
warnings.filterwarnings('ignore')
cross_val_score(model2,feature,target,cv=cv)

array([-0.03936473, -0.00877122, -0.02617824, -0.03300796, -0.03620152,
       -0.03081914])

In [11]:
model3=DecisionTreeRegressor()
cross_val_score(model3,feature,target,cv=cv)

array([0.99960152, 0.99989143, 0.99978245, 0.99988887, 0.99977354,
       0.99963573])

In [12]:
model4=RandomForestRegressor(n_estimators=200)
warnings.filterwarnings('ignore')
cross_val_score(model4,feature,target,cv=cv)

array([0.99963257, 0.99967101, 0.99996034, 0.99995771, 0.99995387,
       0.99926307])

In [13]:
model5=KNeighborsRegressor(n_neighbors=5)
warnings.filterwarnings('ignore')
cross_val_score(model5,feature,target,cv=cv)

array([0.95019536, 0.9640346 , 0.96294908, 0.97575677, 0.97291519,
       0.95500785])

In [14]:
model6=BaggingRegressor(n_estimators=100,estimator=model1)
warnings.filterwarnings('ignore')
m1=cross_val_score(model6,feature,target,cv=cv)
warnings.filterwarnings('ignore')
m1

array([0.8133993 , 0.84073415, 0.83561605, 0.86873405, 0.84638038,
       0.8281553 ])

In [15]:
model7=BaggingRegressor(n_estimators=100,estimator=model2)
warnings.filterwarnings('ignore')
m2=cross_val_score(model7,feature,target,cv=cv)
warnings.filterwarnings('ignore')
m2

array([-0.03947823, -0.0189219 , -0.05476993, -0.01396862, -0.04234882,
       -0.03932056])

In [16]:
model8=BaggingRegressor(n_estimators=100,estimator=model3)
warnings.filterwarnings('ignore')
m3=cross_val_score(model8,feature,target,cv=cv)
warnings.filterwarnings('ignore')
m3

array([0.9999216 , 0.99995213, 0.99990232, 0.99977802, 0.99994892,
       0.99996523])

In [17]:
model9=BaggingRegressor(n_estimators=100,estimator=model4)
warnings.filterwarnings('ignore')
m4=cross_val_score(model9,feature,target,cv=cv)
warnings.filterwarnings('ignore')
m4


KeyboardInterrupt



In [None]:
model10=BaggingRegressor(n_estimators=100,estimator=model5)
warnings.filterwarnings('ignore')
m5=cross_val_score(model10,feature,target,cv=cv)
warnings.filterwarnings('ignore')
m5

## using train_test_split 

In [18]:
x,xt,y,yt=train_test_split(feature,target,test_size=0.2)

In [21]:
model1=LinearRegression()
model1.fit(x,y)
warnings.filterwarnings('ignore')
model1.score(xt,yt)

0.8317675720730695

In [22]:
model2=SVR()
model2.fit(x,y)
warnings.filterwarnings('ignore')
model2.score(xt,yt)

-0.03449899246552102

In [23]:
model3=DecisionTreeRegressor()
model3.fit(x,y)
warnings.filterwarnings('ignore')
model3.score(xt,yt)

0.9993785722870514

In [24]:
model4=RandomForestRegressor(n_estimators=200)
model4.fit(x,y)
warnings.filterwarnings('ignore')
model4.score(xt,yt)

0.9999199182865375

In [25]:
model5=KNeighborsRegressor(n_neighbors=5)
model5.fit(x,y)
warnings.filterwarnings('ignore')
model5.score(xt,yt)

0.957375267007804

In [26]:
model6=BaggingRegressor(n_estimators=100,estimator=model1)
model6.fit(x,y)
warnings.filterwarnings('ignore')
model6.score(xt,yt)

0.8317567954451122

In [27]:
model7=BaggingRegressor(n_estimators=100,estimator=model2)
model7.fit(x,y)
warnings.filterwarnings('ignore')
model7.score(xt,yt)

-0.03567792205439635

In [28]:
model8=BaggingRegressor(n_estimators=100,estimator=model3)
model8.fit(x,y)
warnings.filterwarnings('ignore')
model8.score(xt,yt)

0.9998735421683861

In [29]:
model9=BaggingRegressor(n_estimators=100,estimator=model4)
model9.fit(x,y)
warnings.filterwarnings('ignore')
model9.score(xt,yt)

0.9996433807995122

In [30]:
model10=BaggingRegressor(n_estimators=100,estimator=model5)
model10.fit(x,y)
warnings.filterwarnings('ignore')
model10.score(xt,yt)

0.9593056601433755

# predicting output

In [40]:
def predict():
    Wind_Speed=int(input("enter Wind_Speed(m/s): "))
    Temperature=int(input("enter Temperature(C): "))
    Humidity=int(input("enter Humidity(%): "))
    x=[[Wind_Speed,Temperature,Humidity]]
    Power_Output=model4.predict(x)
    return Power_Output

In [43]:
# run below cell for pridecting values(output)

In [41]:
predict()

enter Wind_Speed(m/s):  3
enter Temperature(C):  20
enter Humidity(%):  60


array([32.7897289])