In [1]:
import math
import numpy as np
import pandas as pd
from itertools import permutations

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_parquet("data/raw/france.parquet")
data.dropna(axis=0, how='any', inplace=True)
data["id"] = [i for i in range(len(data))]
data["time"] = data.index
data = data.set_index("id")
all_years = []
all_month = []
for t in range(len(data)):
    all_years.append(data["time"][t].year)
    all_month.append(data["time"][t].month)
data["year"] = all_years
data["month"] = all_month
data = data.drop(["time"], axis=1)

position = pd.read_csv("data/raw/postesSynop.csv", sep=";")

Id = position["ID"].astype(str)
for i in range(len(Id)):
    if len(Id[i]) < 5:
        Id[i] = '0' + Id[i]

production = pd.read_parquet("data/raw/franceagrimer-rdts-surfs-multicrops.parquet")
production = production.drop(production[production["n_dep"] == "2A"].index)
production = production.drop(production[production["n_dep"] == "2B"].index)
production = production.drop(production[production["n_dep"].astype(int) > 95].index)

provinces = {7005: 80, 7015: 59, 7020: 50, 7027: 14, 7037: 76, 
             7072: 51, 7110: 29, 7117: 22, 7130: 35, 7139: 61, 
             7149: 91, 7168: 10, 7181: 54, 7190: 67, 7207: 56, 
             7222: 44, 7240: 37, 7255: 18, 7280: 21, 7299: 68, 
             7314: 17, 7335: 86, 7434: 87, 7460: 63, 7471: 43, 
             7481: 69, 7510: 33, 7535: 46, 7558: 12, 7577: 26, 
             7591: 5,  7607: 40, 7621: 65, 7627: 9,  7630: 31, 
             7643: 34, 7650: 13, 7661: 83, 7690: 6,  7747: 66, 
             7761: 91, 67005: 10}

stations = data["id_sta"].unique()
unwanted_stations = []
for i in stations:
    if i not in provinces:
        unwanted_stations.append(i)
for i in unwanted_stations:
    data = data.drop(data[data["id_sta"] == i].index)

years = data["year"].unique()
stations = data["id_sta"].unique()
crops = production["crop"].unique()
n_deps = production["n_dep"].unique()

In [3]:
lr = LinearRegression()

In [4]:
def normalize_year(consider_name, normalized_year_dict):
    for station in stations:
        station_data = data[data["id_sta"] == station]
        for year in years:
            year_station_data = station_data[station_data["year"] == year]
            if len(year_station_data):
                temp_data = year_station_data[consider_name]
                min_data, max_data = min(temp_data), max(temp_data)
                normalized_year_dict[str(station) + "_" + str(year)] = ((temp_data - min_data) / (max_data - min_data)).values

In [5]:
def normalize_month(consider_name, normalized_month_dict):
    for station in stations:
        station_data = data[data["id_sta"] == station]
        for year in years:
            year_station_data = station_data[station_data["year"] == year]
            for month in range(1, 13):
                month_station_data = year_station_data[year_station_data["month"] == month]
                if len(month_station_data):
                    temp_data = month_station_data[consider_name]
                    min_data, max_data = min(temp_data), max(temp_data)
                    normalized_month_dict[str(station) + "_" + str(year) + "_" + str(month)] = ((temp_data - min_data) / (max_data - min_data)).values

In [6]:
def read_in_X(consider_names):
    data_X = [[] for _ in range(len(data))]
    for name in consider_names:
        temp_X = data[name].values
        for i in range(len(data_X)):
            data_X[i].append(temp_X[i])
    
    data_year, data_month = data["year"], data["month"]
    
    data_station = data["id_sta"].tolist()
    data_province = []
    for s in data_station:
        data_province.append(provinces[s])
    
    return data_year, data_month, data_province, data_X

In [7]:
def read_in_X_normalized_year(data_X_normalized_list):
    data_year = []
    data_province = []
    data_X_avg_normalized_year = []
    
    for i in data_X_normalized_list[0]:
        station, year = i.split("_")
        data_year.append(int(year))
        data_province.append(provinces[int(station)])
        
        temp_list = []
        for j in data_X_normalized_list:
            temp_list.append(j[i].sum() / len(j[i]))
        data_X_avg_normalized_year.append(temp_list)
    
    return data_year, data_province, data_X_avg_normalized_year

In [8]:
def read_in_X_normalized_month(data_X_normalized_list):
    data_year = []
    data_month = []
    data_province = []
    data_X_avg_normalized_month = []
    
    for i in data_X_normalized_list[0]:
        station, year, month = i.split("_")
        data_year.append(int(year))
        data_month.append(int(month))
        data_province.append(provinces[int(station)])
        
        temp_list = []
        for j in data_X_normalized_list:
            temp_list.append(j[i].sum() / len(j[i]))
        data_X_avg_normalized_month.append(temp_list)
    
    return data_year, data_province, data_X_avg_normalized_month

In [16]:
def cut_year(data_year, data_province, data_X):
    for i in range(len(data_year)):
        name = str(data_province[i]) + "_" + str(data_year[i])
        if name in data_X_year_avg:
            data_X_year_avg[name] = [data_X_year_avg[name][j] + data_X[i][j] for j in range(len(data_X[i]))]
            data_X_year_number[name] += 1
        else:
            data_X_year_avg[name] = data_X[i]
            data_X_year_number[name] = 1

    for i in data_X_year_avg:
        for j in range(len(data_X_year_avg[i])):
            data_X_year_avg[i][j] /= data_X_year_number[i]

In [17]:
def cut_month(data_year, data_month, data_province, data_X):
    for i in range(len(data_year)):
        name = str(data_province[i]) + "_" + str(data_year[i]) + "_" + str(data_month[i])
        if name in data_X_month_avg:
            data_X_month_avg[name] = [data_X_month_avg[name][j] + data_X[i][j] for j in range(len(data_X[i]))]
            data_X_month_number[name] += 1
        else:
            data_X_month_avg[name] = data_X[i]
            data_X_month_number[name] = 1
    
    for i in data_X_month_avg:
        for j in range(len(data_X_month_avg[i])):
            data_X_month_avg[i][j] /= data_X_month_number[i]

In [11]:
def read_in_Y(crop, consider_part):
    map_crop = production['crop'].map(lambda x: x == crop)
    crop_value = production[map_crop]

    for n in n_deps:
        map_province = crop_value['n_dep'].map(lambda x: x == n)
        crop_n_value = crop_value[map_province]

        for y in years:
            rdt_value = crop_n_value[consider_part + "_" + str(y)].values[0]

            if rdt_value:
                crops_Y_year[crop + "_" + n + "_" + str(y)] = rdt_value

In [14]:
data_year, data_month, data_province, data_X = read_in_X(["rr24"])

In [18]:
data_X_year_avg = {}
data_X_year_number = {}
cut_year(data_year, data_province, data_X)

In [19]:
data_X_month_avg = {}
data_X_month_number = {}
cut_month(data_year, data_month, data_province, data_X)

In [23]:
normalized_year_rr24 = {}
normalize_year("rr24", normalized_year_rr24)

In [25]:
normalize_month_rr24 = {}
normalize_month("rr24", normalize_month_rr24)

In [28]:
data_year, data_province, data_X_avg_normalized_year = read_in_X_normalized_year([normalized_year_rr24])

In [31]:
data_year, data_province, data_X_avg_normalized_month = read_in_X_normalized_month([normalize_month_rr24])

In [35]:
len(normalize_month_rr24)

2349

In [12]:
def init_array_year(crop):
    data_X_year_list = []
    data_Y_list = []
    
    for i in data_X_year_avg:
        if crop + "_" + i in crops_Y_year:
            data_X_year_list.append(data_X_year_avg[i])
            data_Y_list.append(crops_Y_year[crop + "_" + i])
    
    return np.array(data_X_year_list), np.array(data_Y_list)

In [89]:
def add_power(data_X_array, powers):
    data_X_power_array = []
    
    for i in range(len(data_X_array)):
        temp_list = []
        for j in range(len(data_X_array[i])):
            for p in range(1, powers[j] + 1):
                temp_list.append(data_X_array[i][j] ** p)
        data_X_power_array.append(temp_list)
        
    return data_X_power_array

In [92]:
data_X_year_power_array = add_power(data_X_year_array, [3])

In [51]:
def init_array_month(crop, month_wanted):
    data_X_month_list = []
    data_Y_list = []
    
    for i in data_X_month_avg:
        province, year, month = i.split("_")
        if int(month) == month_wanted and crop + "_" + province + "_" + year in crops_Y_year:
            data_X_month_list.append(data_X_month_avg[i])
            data_Y_list.append(crops_Y_year[crop + "_" + province + "_" + year])
    
    return np.array(data_X_month_list), np.array(data_Y_list)

In [52]:
data_X_month_array, data_Y_array = init_array_month("OP", 1)

In [86]:
data_X_month_array

array([[4.34435484],
       [1.29032258],
       [1.26553763],
       [5.4391129 ],
       [2.50736559],
       [0.82741935],
       [0.53875   ],
       [3.63991935],
       [4.19314516],
       [1.44711538],
       [1.72416667],
       [4.95895161],
       [3.05892857],
       [1.42379032],
       [1.29637097],
       [2.43467742],
       [5.48387097],
       [1.40604839],
       [1.32580645],
       [3.87177419],
       [3.10564516],
       [1.13145161],
       [0.66370968],
       [2.77597926],
       [5.38475806],
       [2.72862903],
       [4.44193548],
       [5.90766129],
       [2.82557604],
       [2.76733871],
       [1.67587942],
       [3.88790323],
       [1.65967742],
       [1.38954301],
       [2.1561828 ],
       [1.64738095],
       [3.40685484],
       [1.58352535],
       [1.86733871],
       [2.18709677],
       [2.46172235],
       [1.34919355],
       [0.5781682 ],
       [3.19179147],
       [4.4110023 ],
       [0.99758065],
       [0.62741935],
       [2.240

In [91]:
add_power(data_X_month_array, [3])

[[4.344354838709678, 18.87341896462019, 81.99282900194272],
 [1.290322580645161, 1.664932362122788, 2.14829982209392],
 [1.2655376344086022, 1.601585504104521, 2.0268667301675443],
 [5.439112903225807, 29.583949174037464, 160.91043968088363],
 [2.5073655913978494, 6.286882208925888, 15.763512127832074],
 [0.8274193548387097, 0.6846227887617066, 0.5664701461850894],
 [0.53875, 0.29025156249999995, 0.15637302929687497],
 [3.6399193548387094, 13.249012909729446, 48.22533852263214],
 [4.193145161290322, 17.582466343652438, 73.72583367243617],
 [1.4471153846153846, 2.0941429363905324, 3.030466460834376],
 [1.7241666666666666, 2.972750694444444, 5.125517655671296],
 [4.958951612903225, 24.591201099115498, 121.94657635368635],
 [3.058928571428572, 9.357044005102043, 28.622529251321076],
 [1.423790322580645, 2.027178882674297, 2.8862776752915087],
 [1.2963709677419357, 1.680577686004163, 2.1786521211707197],
 [2.4346774193548386, 5.927654136316336, 14.431925675434693],
 [5.483870967741935, 30.

In [57]:
def predict_zero():
    zero = data_Y_array.mean()

    RMSE = math.sqrt(((zero - data_Y_array) ** 2).sum() / len(data_Y_array))
    rRMSE = RMSE / data_Y_array.mean()

    return rRMSE

In [58]:
predict_zero()

0.2616310200103802

In [93]:
data_X_year_power_array

[[4.067453161592506, 16.544175221748873, 67.29265781164285],
 [1.9797625570776258, 3.91945978240654, 7.759599721180086],
 [2.0470401174168296, 4.190373242313908, 8.577862133966601],
 [2.25859387197502, 5.101246278523114, 11.52164358410768],
 [2.3440795424314977, 5.494708901245859, 12.880034727026672],
 [2.5642710772833714, 6.575486157792023, 16.861328973503245],
 [1.6831650358773649, 2.833044538000051, 4.768481511445028],
 [1.7090397697540562, 2.920816934600998, 4.991792301404237],
 [1.716499142593324, 2.9463693065236165, 5.057440388411075],
 [2.1443561643835602, 4.598263359729774, 9.860314380895602],
 [2.329010025062657, 5.424287696842359, 12.633220424769885],
 [1.8566158077154906, 3.4470222574590434, 6.399796012745596],
 [1.882691777675702, 3.5445283297276946, 6.673254342116921],
 [2.626300693354265, 6.897455331913094, 18.114791720583433],
 [2.108561838624338, 4.446033027302848, 9.374735574634224],
 [2.4116422716627626, 5.81601844647073, 14.026155938279203],
 [1.8557896281800386, 3.4

In [94]:
def predict(times, X, Y):
    sum_RMSE = 0
    coef = np.array([0.0 for i in range(len(X[0]))])
    intercept = 0.0

    for i in range(times):
        X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

        lr.fit(X_train, y_train)
        
        coef += lr.coef_
        intercept += lr.intercept_
        
        y_predict = lr.predict(X_test)

        RMSE = math.sqrt(((y_predict - y_test) ** 2).sum() / len(y_test))
        rRMSE = RMSE / y_test.mean()

        sum_RMSE += rRMSE

    return sum_RMSE / times, coef / times, intercept / times

In [101]:
predict(10, data_X_year_power_array, data_Y_array)

(0.2404337440534336,
 array([ 86.6150402, -38.077237 ,   5.2163163]),
 -11.12829956027521)

## rr24

In [13]:
start = 0
end = 10
times = 10
total_rain = {}

coeffs = {}
rRMSE_degree_month = {}

init_x(total_rain, "rr24")

for crop in crops:
    total_rdt = {}
    init_y(total_rain)
    
    for month in range(1, 13):
        temp_data_array, rdt_array = init_list(total_rain, month)

        for d in range(start, end):
            data_array = add_degreed_data(temp_data_array, d)

            predict_n(times, d, month)

In [54]:
rRMSE_degree_month

{'OP_0_1': 0.2616310200103802,
 'OP_1_1': 0.2514657772536899,
 'OP_2_1': 0.25197984444121324,
 'OP_3_1': 0.25684078276032546,
 'OP_4_1': 0.29411136512608094,
 'OP_5_1': 0.5247400689473605,
 'OP_6_1': 0.3934131393342805,
 'OP_7_1': 0.5115944502717203,
 'OP_8_1': 1.09782090637159,
 'OP_9_1': 5.712819666216058,
 'OP_0_2': 0.2617208144543308,
 'OP_1_2': 0.2643055869099942,
 'OP_2_2': 0.26608297564561967,
 'OP_3_2': 0.27751731861524775,
 'OP_4_2': 0.3546072585162329,
 'OP_5_2': 0.3955298699355534,
 'OP_6_2': 0.6093043569738558,
 'OP_7_2': 1.5759358975064364,
 'OP_8_2': 8.428571155037618,
 'OP_9_2': 22.849395096999068,
 'OP_0_3': 0.2617208144543308,
 'OP_1_3': 0.26260053319573556,
 'OP_2_3': 0.26215234354680683,
 'OP_3_3': 0.31798933339809626,
 'OP_4_3': 1.0703378412748419,
 'OP_5_3': 1.8324110089835484,
 'OP_6_3': 13.831658738669212,
 'OP_7_3': 79.86340617585282,
 'OP_8_3': 379.2542251764764,
 'OP_9_3': 717.4859725238643,
 'OP_0_4': 0.2617208144543308,
 'OP_1_4': 0.2624088373516497,
 'OP_2_

In [55]:
best_rRMSE_month = {}
best_degree_month = {}

find_best_degree_each_month()
best_rRMSE_month

{'OP_1_1': 0.2514657772536899,
 'OP_0_2': 0.2617208144543308,
 'OP_0_3': 0.2617208144543308,
 'OP_0_4': 0.2617208144543308,
 'OP_0_5': 0.2617208144543308,
 'OP_0_6': 0.2617208144543308,
 'OP_0_7': 0.2617208144543308,
 'OP_2_8': 0.2541899204143865,
 'OP_0_9': 0.2533266395078882,
 'OP_1_10': 0.24391649250229228,
 'OP_2_11': 0.22693155323380484,
 'OP_1_12': 0.2339401427439666,
 'CZH_3_1': 0.1964001924237411,
 'CZH_0_2': 0.19819454892258925,
 'CZH_0_3': 0.19819454892258925,
 'CZH_3_4': 0.19572974640753293,
 'CZH_1_5': 0.19705211314658905,
 'CZH_0_6': 0.19819454892258925,
 'CZH_0_7': 0.19819454892258925,
 'CZH_2_8': 0.19211145404488958,
 'CZH_0_9': 0.2119383618060273,
 'CZH_1_10': 0.21142889120660446,
 'CZH_4_11': 0.19949023417614534,
 'CZH_3_12': 0.2034166430693095,
 'BTH_3_1': 0.25271228852615457,
 'BTH_1_2': 0.2569161113730976,
 'BTH_0_3': 0.25854358755919676,
 'BTH_0_4': 0.25854358755919676,
 'BTH_0_5': 0.25854358755919676,
 'BTH_1_6': 0.25541893791255743,
 'BTH_0_7': 0.2585435875591967

In [56]:
best_predict = {}
best_predict_degree = {}

for crop in crops:
    temp_list = []
    for i in range(1, 13):
        temp_list.append(best_rRMSE_month[crop + "_" + str(best_degree_month[crop + "_" + str(i)]) + "_" + str(i)])
    best_predict[crop + "_" + str(temp_list.index(min(temp_list)) + 1)] = min(temp_list)
    best_predict_degree[crop + "_" + str(temp_list.index(min(temp_list)) + 1)] = best_degree_month[crop + "_" + str(temp_list.index(min(temp_list)) + 1)]

best_predict

{'OP_11': 0.22693155323380484,
 'CZH_8': 0.19211145404488958,
 'BTH_12': 0.2201461295134616,
 'TS_1': 0.20051435915952773,
 'BTP_11': 0.2714494615364387,
 'BDP_11': 0.21762078724773587,
 'BDH_11': 0.2153566789645981,
 'OH_11': 0.206581289004634,
 'MA_5': 0.20250657883599493}

In [57]:
best_predict_degree

{'OP_11': 2,
 'CZH_8': 2,
 'BTH_12': 2,
 'TS_1': 3,
 'BTP_11': 2,
 'BDP_11': 2,
 'BDH_11': 2,
 'OH_11': 2,
 'MA_5': 4}

In [58]:
best_coeffs = {}

for s in best_predict:
    crop, month = s.split("_")
    best_coeffs[crop + "_" + str(best_predict_degree[s]) + "_" + str(month)] = coeffs[crop + "_" + str(best_predict_degree[s]) + "_" + str(month)]
        
best_coeffs

{'OP_2_11': array([ 9.16274117, -0.60301091]),
 'CZH_2_8': array([ 4.43965544, -0.39794613]),
 'BTH_2_12': array([12.46624579, -0.67868152]),
 'TS_3_1': array([ 8.32619647, -1.22777064,  0.05964496]),
 'BTP_2_11': array([13.03359942, -0.85776107]),
 'BDP_2_11': array([ 6.8591342 , -0.43217649]),
 'BDH_2_11': array([ 6.88827383, -0.43406312]),
 'OH_2_11': array([ 8.3571934 , -0.48142785]),
 'MA_4_5': array([-99.58410515,  28.8019574 ,  -3.25513272,   0.12681535])}

## DJ_0

In [59]:
start = 0
end = 10
times = 1000
total_rad_0 = {}

coeffs = {}
rRMSE_degree_month = {}

init_x(total_rad_0, "DJ_0")

for crop in crops:
    total_rdt = {}
    init_y(total_rad_0)
    
    for month in range(1, 13):
        temp_data_array, rdt_array = init_list(total_rad_0, month)

        for d in range(start, end):
            data_array = add_degreed_data(temp_data_array, d)

            predict_n(times, d, month)

In [60]:
best_rRMSE_month = {}
best_degree_month = {}

find_best_degree_each_month()
best_rRMSE_month

{'OP_0_1': 0.2616310200103802,
 'OP_0_2': 0.2617208144543308,
 'OP_1_3': 0.25919622765176176,
 'OP_1_4': 0.25610747144650714,
 'OP_4_5': 0.2577809694843522,
 'OP_0_6': 0.2617208144543308,
 'OP_1_7': 0.2412227062671668,
 'OP_2_8': 0.2570335670410182,
 'OP_5_9': 0.24646036373356997,
 'OP_0_10': 0.2533266395078882,
 'OP_0_11': 0.2533266395078882,
 'OP_3_12': 0.2507919946945341,
 'CZH_0_1': 0.19863737197918252,
 'CZH_0_2': 0.19819454892258925,
 'CZH_1_3': 0.1911813495177803,
 'CZH_1_4': 0.19692133669339637,
 'CZH_2_5': 0.18624674286381265,
 'CZH_0_6': 0.19819454892258925,
 'CZH_1_7': 0.1941364912622429,
 'CZH_0_8': 0.19819454892258925,
 'CZH_1_9': 0.19723244904645587,
 'CZH_1_10': 0.21084713054851517,
 'CZH_1_11': 0.21172160439723386,
 'CZH_0_12': 0.2119383618060273,
 'BTH_0_1': 0.2606952441631043,
 'BTH_0_2': 0.25854358755919676,
 'BTH_1_3': 0.24443022118364563,
 'BTH_6_4': 0.25793404179706586,
 'BTH_4_5': 0.23836750727928346,
 'BTH_0_6': 0.25854358755919676,
 'BTH_1_7': 0.231700605531929

In [61]:
best_predict = {}
best_predict_degree = {}

for crop in crops:
    temp_list = []
    for i in range(1, 13):
        temp_list.append(best_rRMSE_month[crop + "_" + str(best_degree_month[crop + "_" + str(i)]) + "_" + str(i)])
    best_predict[crop + "_" + str(temp_list.index(min(temp_list)) + 1)] = min(temp_list)
    best_predict_degree[crop + "_" + str(temp_list.index(min(temp_list)) + 1)] = best_degree_month[crop + "_" + str(temp_list.index(min(temp_list)) + 1)]

best_predict

{'OP_7': 0.2412227062671668,
 'CZH_5': 0.18624674286381265,
 'BTH_7': 0.2317006055319292,
 'TS_3': 0.19255257821608102,
 'BTP_7': 0.279623655333199,
 'BDP_9': 0.22971825398098297,
 'BDH_9': 0.2297932031297343,
 'OH_7': 0.21732696271447388,
 'MA_5': 0.20671314265610277}

In [62]:
best_coeffs = {}

for s in best_predict:
    crop, month = s.split("_")
    best_coeffs[crop + "_" + str(best_predict_degree[s]) + "_" + str(month)] = coeffs[crop + "_" + str(best_predict_degree[s]) + "_" + str(month)]
        
best_coeffs

{'OP_1_7': array([-1.85354376]),
 'CZH_2_5': array([ 4.76383737, -0.17645339]),
 'BTH_1_7': array([-2.67313045]),
 'TS_4_3': array([-1.82159820e+02,  1.78439500e+01, -7.70247279e-01,  1.23156798e-02]),
 'BTP_1_7': array([-2.55637919]),
 'BDP_3_9': array([117.29853055,  -7.25680426,   0.14595582]),
 'BDH_3_9': array([118.98412296,  -7.36376867,   0.14818353]),
 'OH_1_7': array([-1.96595992]),
 'MA_1_5': array([-1.92951291])}

## DJ_6

In [63]:
start = 0
end = 10
times = 1000
total_rad_6 = {}

coeffs = {}
rRMSE_degree_month = {}

init_x(total_rad_6, "DJ_6")

for crop in crops:
    for month in range(1, 13):
        total_rdt = {}
        init_y(total_rad_6)
        temp_data_array, rdt_array = init_list(total_rad_6, month)

        for d in range(start, end):
            data_array = add_degreed_data(temp_data_array, d)

            predict_n(times, d, month)

In [64]:
best_rRMSE_month = {}
best_degree_month = {}

find_best_degree_each_month()
best_rRMSE_month

{'OP_1_1': 0.2591063906550345,
 'OP_0_2': 0.2617208144543308,
 'OP_4_3': 0.2546116894178847,
 'OP_2_4': 0.25668942788576954,
 'OP_2_5': 0.25846717029998456,
 'OP_0_6': 0.2617208144543308,
 'OP_1_7': 0.24118850475295264,
 'OP_2_8': 0.2603777236334694,
 'OP_5_9': 0.2466583327131559,
 'OP_0_10': 0.2533266395078882,
 'OP_3_11': 0.25046386881817706,
 'OP_0_12': 0.2533266395078882,
 'CZH_0_1': 0.19863737197918252,
 'CZH_0_2': 0.19819454892258925,
 'CZH_2_3': 0.19083758381397362,
 'CZH_1_4': 0.19517514706682984,
 'CZH_4_5': 0.1895895044465195,
 'CZH_0_6': 0.19819454892258925,
 'CZH_2_7': 0.19474206546272,
 'CZH_0_8': 0.19819454892258925,
 'CZH_5_9': 0.1990569897703025,
 'CZH_1_10': 0.20798756795103718,
 'CZH_3_11': 0.20715890134206016,
 'CZH_0_12': 0.2119383618060273,
 'BTH_1_1': 0.2504713473236602,
 'BTH_0_2': 0.25854358755919676,
 'BTH_4_3': 0.24004554420867516,
 'BTH_1_4': 0.2534356423801448,
 'BTH_4_5': 0.24437311671115453,
 'BTH_0_6': 0.25854358755919676,
 'BTH_2_7': 0.23172515154266965,

In [65]:
best_predict = {}
best_predict_degree = {}

for crop in crops:
    temp_list = []
    for i in range(1, 13):
        temp_list.append(best_rRMSE_month[crop + "_" + str(best_degree_month[crop + "_" + str(i)]) + "_" + str(i)])
    best_predict[crop + "_" + str(temp_list.index(min(temp_list)) + 1)] = min(temp_list)
    best_predict_degree[crop + "_" + str(temp_list.index(min(temp_list)) + 1)] = best_degree_month[crop + "_" + str(temp_list.index(min(temp_list)) + 1)]

best_predict

{'OP_7': 0.24118850475295264,
 'CZH_5': 0.1895895044465195,
 'BTH_7': 0.23172515154266965,
 'TS_3': 0.1903771572018439,
 'BTP_7': 0.2830534788338335,
 'BDP_11': 0.22626203347209944,
 'BDH_11': 0.22368607246687944,
 'OH_3': 0.21044173566867846,
 'MA_12': 0.20257598934174814}

In [66]:
best_coeffs = {}

for s in best_predict:
    crop, month = s.split("_")
    best_coeffs[crop + "_" + str(best_predict_degree[s]) + "_" + str(month)] = coeffs[crop + "_" + str(best_predict_degree[s]) + "_" + str(month)]
        
best_coeffs

{'OP_1_7': array([-1.84175242]),
 'CZH_4_5': array([-6.79030522e+01,  7.32076668e+00, -3.34316516e-01,  5.45280028e-03]),
 'BTH_2_7': array([-7.9119586 ,  0.20408508]),
 'TS_1_3': array([-0.62472851]),
 'BTP_2_7': array([-7.64078749,  0.20351307]),
 'BDP_2_11': array([ 2.02873412, -0.12731567]),
 'BDH_2_11': array([ 2.0115419 , -0.12646696]),
 'OH_4_3': array([ 8.52866499e+01, -1.37703284e+01,  9.14506294e-01, -2.15518524e-02]),
 'MA_2_12': array([-4.32593385,  0.18938979])}