In [16]:
#add 3 new, features to our district-level ML dataset from our LSTM model
from tensorflow.keras.models import load_model
import numpy as np
import pandas as pd

model_ms = load_model(r"C:\Ibrahim\Personal\University Stuff\Machine Learning\Project\ML Irrigation Project\models\global_multistep_lstm.keras")

print("lstm model loaded.")

lstm model loaded.


In [17]:
rain_path = r"C:\Ibrahim\Personal\University Stuff\Machine Learning\Project\ML Irrigation Project\data\raw\Rain_fall_in_Pakistan.csv"

rain = pd.read_csv(rain_path)
rain = rain.iloc[1:]  #remove metadata row

rain['date'] = pd.to_datetime(rain['date'], errors='coerce')
rain['rfh'] = pd.to_numeric(rain['rfh'], errors='coerce')

#numeric conversion for all LSTM-required features
FEATURES = ['rfh', 'rfh_avg', 'r1h', 'r3h', 'n_pixels']

for col in FEATURES:
    rain[col] = pd.to_numeric(rain[col], errors='coerce')

#fill missing vals exactly like in LSTM preprocessing
rain[FEATURES] = rain[FEATURES].bfill().ffill()

#keep only the necessary cols
rain_small = rain[['ADM2_PCODE', 'date'] + FEATURES].copy()

#extract year (for matching district-year forecast)
rain_small['Year'] = rain_small['date'].dt.year

#ensure sorted by time
rain_small = rain_small.sort_values(['ADM2_PCODE', 'date']).reset_index(drop=True)

rain_small.head(10)

Unnamed: 0,ADM2_PCODE,date,rfh,rfh_avg,r1h,r3h,n_pixels,Year
0,PK203,1981-01-01,9.0142,7.8681,40.9483,95.7615,2398.0,1981
1,PK203,1981-01-11,13.4554,10.0483,40.9483,95.7615,2398.0,1981
2,PK203,1981-01-21,18.4787,10.3431,40.9483,95.7615,2398.0,1981
3,PK203,1981-02-01,11.6697,8.8762,43.6038,95.7615,2398.0,1981
4,PK203,1981-02-11,7.8745,9.0815,38.0229,95.7615,2398.0,1981
5,PK203,1981-02-21,8.3937,6.7477,27.9379,95.7615,2398.0,1981
6,PK203,1981-03-01,4.8982,7.4553,21.1664,95.7615,2398.0,1981
7,PK203,1981-03-11,19.324,7.6543,32.6159,95.7615,2398.0,1981
8,PK203,1981-03-21,2.653,8.2749,26.8753,95.7615,2398.0,1981
9,PK203,1981-04-01,1.2031,3.3786,23.1802,87.9504,2398.0,1981


In [18]:
df = pd.read_csv(r"C:\Ibrahim\Personal\University Stuff\Machine Learning\Project\ML Irrigation Project\data\processed\final_cleaned_district_dataset.csv")

df.head()

Unnamed: 0,District,Year,Avg_Rainfall,Avg_Temperature,Crop_Yield,Irrigation_Area
0,PK203,1981,3.423858,20.206408,2973.113131,118.704918
1,PK203,1982,6.698594,19.510198,2973.113131,118.704918
2,PK203,1983,3.331681,19.611718,2973.113131,118.704918
3,PK203,1984,2.419928,19.920437,2973.113131,118.704918
4,PK203,1985,2.637431,20.242908,2973.113131,118.704918


In [19]:
#creating forecast function for each district-year
#forecast will based on the last 60 timesteps before dec 31 of each year

SEQ_LEN = 60
FUTURE_STEPS = 24
FEATURES = ['rfh','rfh_avg','r1h','r3h','n_pixels']


def lstm_forecast_24steps(seq):
    """ predicts the next 24 hours using the multivariate LSTM.
        seq shape: (1, 60, 5)
    """
    return model_ms.predict(seq, verbose=0)[0]


def lstm_forecast_7days_fast(seq):
    #FAST VERSION of 7-day forecast: only runs 7 LSTM predictions.
    
    seq7 = seq.copy()
    preds = []

    for _ in range(7):   #7 forecasted days (one-step predictions)
        p = model_ms.predict(seq7, verbose=0)[0][0]
        preds.append(p)

        #shift LEFT and insert new predicted value
        seq7 = np.roll(seq7, -1, axis=1)
        seq7[0, -1, 0] = p      #only replace rainfall channel

    return np.mean(preds)


def make_sequence(values, idx):
    """ extract the 60-timestep window before index idx.
        returns window of shape (1, 60, 5) or None.
    """
    if idx < SEQ_LEN:
        return None
    
    window = values[idx-SEQ_LEN+1 : idx+1]

    if window.shape != (SEQ_LEN, len(FEATURES)):
        return None

    return window.reshape(1, SEQ_LEN, len(FEATURES))


In [20]:
import numpy as np

df['Rain_Next_1']  = np.nan
df['Rain_Next_24'] = np.nan
df['Rain_Next_7d'] = np.nan

districts = df['District'].unique()

for district in districts:

    print(f"\nProcessing district: {district}")

    #rainfall history for this district (using rain_small)
    rdist = rain_small[rain_small['ADM2_PCODE'] == district].copy()
    rdist = rdist.sort_values("date")

    if len(rdist) < SEQ_LEN:
        print("  not enough rainfall history â€” skipping.")
        continue

    #rainfall matrix of shape (N,5)
    values = rdist[FEATURES].values

    years = df[df['District'] == district]['Year'].unique()

    for Y in years:

        #index of last hour BEFORE this year
        idx_series = rdist.index[rdist['Year'] < Y]

        if len(idx_series) == 0:
            continue

        idx = int(idx_series.max())

        #build sequence
        seq = make_sequence(values, idx)
        if seq is None:
            continue

        #24-hr forecast
        pred24 = lstm_forecast_24steps(seq)
        next_1  = pred24[0]
        next_24 = pred24[:24].mean()

        #7-hr forecast(fast) ----
        next_7 = lstm_forecast_7days_fast(seq)

        #write to df
        df.loc[(df['District']==district) & (df['Year']==Y),
               ['Rain_Next_1','Rain_Next_24','Rain_Next_7d']] = [
                   next_1, next_24, next_7
               ]

print("\nforecast merging complete.")



Processing district: PK203

Processing district: PK204

Processing district: PK209

Processing district: PK211

Processing district: PK215

Processing district: PK230

Processing district: PK401

Processing district: PK508

Processing district: PK509

Processing district: PK514

Processing district: PK516

Processing district: PK519

Processing district: PK523

Processing district: PK526

Processing district: PK530

Processing district: PK601

Processing district: PK603

Processing district: PK604

Processing district: PK609

Processing district: PK612

Processing district: PK614

Processing district: PK626

Processing district: PK629

Processing district: PK636

Processing district: PK708

Processing district: PK709

Processing district: PK711

Processing district: PK714

Processing district: PK718

Processing district: PK726

forecast merging complete.


In [23]:
df.head(20)

Unnamed: 0,District,Year,Avg_Rainfall,Avg_Temperature,Crop_Yield,Irrigation_Area,Rain_Next_1,Rain_Next_24,Rain_Next_7d
0,PK203,1981,3.423858,20.206408,2973.113131,118.704918,,,
1,PK203,1982,6.698594,19.510198,2973.113131,118.704918,,,
2,PK203,1983,3.331681,19.611718,2973.113131,118.704918,0.038911,0.008339,0.038079
3,PK203,1984,2.419928,19.920437,2973.113131,118.704918,0.042888,0.008904,-0.012836
4,PK203,1985,2.637431,20.242908,2973.113131,118.704918,0.042122,0.008781,0.035759
5,PK203,1986,2.657394,19.578992,2973.113131,118.704918,0.041946,0.008753,0.041468
6,PK203,1987,2.810428,20.42715,2973.113131,118.704918,0.0423,0.008807,0.042156
7,PK203,1988,3.055322,20.858067,2973.113131,118.704918,0.041686,0.00871,0.038747
8,PK203,1989,3.025525,19.541077,2973.113131,118.704918,0.042265,0.008803,0.028271
9,PK203,1990,3.990772,20.353342,2973.113131,118.704918,0.042073,0.008777,0.041142
