In [2]:
import math
import numpy as np
import pandas as pd
from itertools import permutations

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_parquet("data/raw/france.parquet")
data.dropna(axis=0, how='any', inplace=True)
data["id"] = [i for i in range(len(data))]
data["time"] = data.index
data = data.set_index("id")
all_years = []
all_month = []
for t in range(len(data)):
    all_years.append(data["time"][t].year)
    all_month.append(data["time"][t].month)
data["year"] = all_years
data["month"] = all_month
data = data.drop(["time"], axis=1)

position = pd.read_csv("data/raw/postesSynop.csv", sep=";")

Id = position["ID"].astype(str)
for i in range(len(Id)):
    if len(Id[i]) < 5:
        Id[i] = '0' + Id[i]

production = pd.read_parquet("data/raw/franceagrimer-rdts-surfs-multicrops.parquet")
production = production.drop(production[production["n_dep"] == "2A"].index)
production = production.drop(production[production["n_dep"] == "2B"].index)
production = production.drop(production[production["n_dep"].astype(int) > 95].index)

provinces = {7005: 80, 7015: 59, 7020: 50, 7027: 14, 7037: 76, 
             7072: 51, 7110: 29, 7117: 22, 7130: 35, 7139: 61, 
             7149: 91, 7168: 10, 7181: 54, 7190: 67, 7207: 56, 
             7222: 44, 7240: 37, 7255: 18, 7280: 21, 7299: 68, 
             7314: 17, 7335: 86, 7434: 87, 7460: 63, 7471: 43, 
             7481: 69, 7510: 33, 7535: 46, 7558: 12, 7577: 26, 
             7591: 5,  7607: 40, 7621: 65, 7627: 9,  7630: 31, 
             7643: 34, 7650: 13, 7661: 83, 7690: 6,  7747: 66, 
             7761: 91, 67005: 10}

stations = data["id_sta"].unique()
unwanted_stations = []
for i in stations:
    if i not in provinces:
        unwanted_stations.append(i)
for i in unwanted_stations:
    data = data.drop(data[data["id_sta"] == i].index)

years = data["year"].unique()
stations = data["id_sta"].unique()
crops = production["crop"].unique()
n_deps = production["n_dep"].unique()

# Year, Original

In [3]:
def read_in_X(consider_names):
    data_X = [[] for _ in range(len(data))]
    for name in consider_names:
        temp_X = data[name].values
        for i in range(len(data_X)):
            data_X[i].append(temp_X[i])
    
    data_year, data_month = data["year"].tolist(), data["month"].tolist()
    
    data_station = data["id_sta"].tolist()
    data_province = []
    for s in data_station:
        data_province.append(provinces[s])
    
    return data_year, data_month, data_province, data_X

In [4]:
data_year, data_month, data_province, data_rr24 = read_in_X(["rr24"])

In [5]:
data_rr24

[[2.857142857142857],
 [4.925],
 [2.175],
 [0.775],
 [0.0875],
 [1.575],
 [0.85],
 [1.05],
 [32.7375],
 [10.537500000000001],
 [9.15],
 [20.2],
 [8.075],
 [15.950000000000001],
 [18.7125],
 [2.6625],
 [1.5250000000000001],
 [6.675],
 [18.05],
 [2.775],
 [0.07499999999999998],
 [-0.0625],
 [0.325],
 [0.05],
 [-0.0625],
 [0.1625],
 [0.1625],
 [1.375],
 [0.35000000000000003],
 [0.95],
 [0.3],
 [0.525],
 [8.425],
 [0.07500000000000001],
 [0.75],
 [0.6499999999999999],
 [0.325],
 [0.8624999999999999],
 [0.21250000000000002],
 [0.75],
 [0.0],
 [0.4],
 [0.4],
 [0.27142857142857146],
 [0.275],
 [0.037500000000000006],
 [0.0],
 [0.25],
 [0.325],
 [3.65],
 [1.55],
 [17.4625],
 [7.7125],
 [5.15],
 [0.7625],
 [-0.0625],
 [0.5125],
 [0.125],
 [0.625],
 [0.0625],
 [0.15000000000000002],
 [0.05],
 [-0.025],
 [0.125],
 [3.3],
 [11.1625],
 [2.2375],
 [0.17500000000000002],
 [0.7999999999999999],
 [0.3375],
 [0.8999999999999999],
 [13.225],
 [14.825],
 [2.475],
 [-0.06000000000000001],
 [1.825],
 [0.275

In [6]:
def avg_year(data_year, data_province, data_X):
    for i in range(len(data_year)):
        name = str(data_province[i]) + "_" + str(data_year[i])
        
        if name in data_X_year_avg:
            for j in range(len(data_X[i])):
                data_X_year_avg[name][j] += data_X[i][j]
            data_X_year_number[name] += 1
        else:
            data_X_year_avg[name] = data_X[i].copy()
            data_X_year_number[name] = 1

    for i in data_X_year_avg:
        for j in range(len(data_X_year_avg[i])):
            data_X_year_avg[i][j] /= data_X_year_number[i]

In [7]:
data_X_year_avg = {}
data_X_year_number = {}
avg_year(data_year, data_province, data_rr24)
data_rr24_year_avg = data_X_year_avg.copy()

In [8]:
data_rr24_year_avg

{'80_2017': [4.067453161592506],
 '80_2018': [1.9797625570776258],
 '80_2019': [2.0470401174168296],
 '80_2020': [2.25859387197502],
 '80_2021': [2.3440795424314977],
 '80_2022': [1.3862786023500318],
 '59_2017': [2.5642710772833714],
 '59_2018': [1.6831650358773649],
 '59_2019': [1.7090397697540562],
 '59_2020': [1.716499142593324],
 '59_2021': [2.1443561643835602],
 '59_2022': [0.9668135435992573],
 '50_2017': [2.329010025062657],
 '50_2018': [1.8566158077154906],
 '50_2019': [1.882691777675702],
 '50_2020': [2.626300693354265],
 '50_2021': [2.108561838624338],
 '50_2022': [1.2050633889919604],
 '14_2017': [2.4116422716627626],
 '14_2018': [1.8557896281800386],
 '14_2019': [2.1290900195694724],
 '14_2020': [2.0660873666406445],
 '14_2021': [1.8229606481481493],
 '14_2022': [1.0028679653679655],
 '76_2017': [3.171734553325463],
 '76_2018': [2.1736149267399245],
 '76_2019': [2.2568982387475556],
 '76_2020': [2.10769041041831],
 '76_2021': [2.654618395303329],
 '76_2022': [0.89407158317

In [9]:
def read_in_Y(crop, consider_part):
    map_crop = production['crop'].map(lambda x: x == crop)
    crop_value = production[map_crop]

    for n in n_deps:
        map_province = crop_value['n_dep'].map(lambda x: x == n)
        crop_n_value = crop_value[map_province]

        for y in years:
            rdt_value = crop_n_value[consider_part + "_" + str(y)].values[0]

            if rdt_value:
                crops_Y_year[crop + "_" + n + "_" + str(y)] = rdt_value

In [10]:
crops_Y_year = {}
read_in_Y("OP", "rdt")
crops_rdt_year = crops_Y_year.copy()

In [11]:
crops_rdt_year

{'OP_77_2017': 72.0,
 'OP_77_2018': 68.0,
 'OP_77_2019': 79.0,
 'OP_77_2020': 53.0,
 'OP_77_2021': 70.0,
 'OP_78_2017': 65.0,
 'OP_78_2018': 62.0,
 'OP_78_2019': 78.0,
 'OP_78_2020': 51.0,
 'OP_78_2021': 67.0,
 'OP_91_2017': 68.0,
 'OP_91_2018': 69.0,
 'OP_91_2019': 78.0,
 'OP_91_2020': 54.0,
 'OP_91_2021': 73.0,
 'OP_93_2017': 69.0,
 'OP_93_2019': 78.0,
 'OP_93_2021': 64.0,
 'OP_94_2017': 68.0,
 'OP_94_2018': 69.0,
 'OP_94_2019': 78.0,
 'OP_94_2020': 54.0,
 'OP_94_2021': 73.0,
 'OP_95_2017': 69.0,
 'OP_95_2018': 58.0,
 'OP_95_2019': 78.0,
 'OP_95_2020': 50.0,
 'OP_95_2021': 64.0,
 'OP_18_2017': 54.0,
 'OP_18_2018': 53.0,
 'OP_18_2019': 64.0,
 'OP_18_2020': 38.0,
 'OP_18_2021': 53.0,
 'OP_28_2017': 73.0,
 'OP_28_2018': 66.0,
 'OP_28_2019': 82.0,
 'OP_28_2020': 53.0,
 'OP_28_2021': 71.0,
 'OP_36_2017': 57.5,
 'OP_36_2018': 54.0,
 'OP_36_2019': 67.0,
 'OP_36_2020': 40.0,
 'OP_36_2021': 66.0,
 'OP_37_2017': 55.0,
 'OP_37_2018': 47.0,
 'OP_37_2019': 67.5,
 'OP_37_2020': 45.0,
 'OP_37_2021'

In [12]:
def init_array_year(crop):
    data_X_year_list = []
    data_Y_list = []
    
    for i in data_X_year_avg:
        if crop + "_" + i in crops_Y_year:
            data_X_year_list.append(data_X_year_avg[i])
            data_Y_list.append(crops_Y_year[crop + "_" + i])
    
    return np.array(data_X_year_list), np.array(data_Y_list)

In [13]:
data_rr24_year_avg_array, data_rdt_array = init_array_year("OP")

In [14]:
data_rr24_year_avg_array

array([[4.06745316],
       [1.97976256],
       [2.04704012],
       [2.25859387],
       [2.34407954],
       [2.56427108],
       [1.68316504],
       [1.70903977],
       [1.71649914],
       [2.14435616],
       [2.32901003],
       [1.85661581],
       [1.88269178],
       [2.62630069],
       [2.10856184],
       [2.41164227],
       [1.85578963],
       [2.12909002],
       [2.06608737],
       [1.82296065],
       [3.17173455],
       [2.17361493],
       [2.25689824],
       [2.10769041],
       [2.6546184 ],
       [2.11425397],
       [1.33113503],
       [1.53692433],
       [1.51408399],
       [1.87773386],
       [3.35957672],
       [3.2250199 ],
       [3.72285024],
       [3.72249707],
       [2.85594227],
       [2.72645492],
       [2.21804012],
       [2.69148742],
       [2.19557912],
       [2.15585596],
       [2.11194412],
       [1.8391342 ],
       [1.96471088],
       [2.07701171],
       [1.82156008],
       [2.05121487],
       [2.19501771],
       [2.129

In [15]:
def add_power(data_X_array, powers):
    data_X_power_array = []
    
    for i in range(len(data_X_array)):
        temp_list = []
        
        for j in range(len(data_X_array[i])):
            for p in range(1, powers[j] + 1):
                temp_list.append(data_X_array[i][j] ** p)
        data_X_power_array.append(temp_list)
        
    return data_X_power_array

In [16]:
data_rr24_year_avg_power_array = add_power(data_rr24_year_avg_array, [3])

In [17]:
data_rr24_year_avg_power_array

[[4.067453161592506, 16.544175221748873, 67.29265781164285],
 [1.9797625570776258, 3.91945978240654, 7.759599721180086],
 [2.0470401174168296, 4.190373242313908, 8.577862133966601],
 [2.25859387197502, 5.101246278523114, 11.52164358410768],
 [2.3440795424314977, 5.494708901245859, 12.880034727026672],
 [2.5642710772833714, 6.575486157792023, 16.861328973503245],
 [1.6831650358773649, 2.833044538000051, 4.768481511445028],
 [1.7090397697540562, 2.920816934600998, 4.991792301404237],
 [1.716499142593324, 2.9463693065236165, 5.057440388411075],
 [2.1443561643835602, 4.598263359729774, 9.860314380895602],
 [2.329010025062657, 5.424287696842359, 12.633220424769885],
 [1.8566158077154906, 3.4470222574590434, 6.399796012745596],
 [1.882691777675702, 3.5445283297276946, 6.673254342116921],
 [2.626300693354265, 6.897455331913094, 18.114791720583433],
 [2.108561838624338, 4.446033027302848, 9.374735574634224],
 [2.4116422716627626, 5.81601844647073, 14.026155938279203],
 [1.8557896281800386, 3.4

# Month, Original

In [18]:
def avg_month(data_year, data_month, data_province, data_X):
    for i in range(len(data_year)):
        name = str(data_province[i]) + "_" + str(data_year[i]) + "_" + str(data_month[i])
        
        if name in data_X_month_avg:
            for j in range(len(data_X[i])):
                data_X_month_avg[name][j] += data_X[i][j]
            data_X_month_number[name] += 1
        else:
            data_X_month_avg[name] = data_X[i].copy()
            data_X_month_number[name] = 1
    
    for i in data_X_month_avg:
        for j in range(len(data_X_month_avg[i])):
            data_X_month_avg[i][j] /= data_X_month_number[i]

In [19]:
data_X_month_avg = {}
data_X_month_number = {}
avg_month(data_year, data_month, data_province, data_rr24)
data_rr24_month_avg = data_X_month_avg.copy()

In [20]:
data_rr24_month_avg

{'80_2017_9': [5.488988095238096],
 '80_2017_10': [1.6946428571428571],
 '80_2017_11': [4.41645238095238],
 '80_2017_12': [4.72684331797235],
 '80_2018_1': [4.344354838709678],
 '80_2018_2': [1.2901785714285714],
 '80_2018_3': [1.8590322580645158],
 '80_2018_4': [2.0504166666666674],
 '80_2018_5': [1.9919354838709675],
 '80_2018_6': [0.35250000000000004],
 '80_2018_7': [0.8965053763440859],
 '80_2018_8': [3.162231182795698],
 '80_2018_9': [1.334166666666667],
 '80_2018_10': [1.0754032258064516],
 '80_2018_11': [1.4241666666666668],
 '80_2018_12': [3.8205645161290325],
 '80_2019_1': [1.290322580645161],
 '80_2019_2': [1.0995535714285716],
 '80_2019_3': [2.016532258064516],
 '80_2019_4': [1.3174999999999997],
 '80_2019_5': [0.7598502304147464],
 '80_2019_6': [2.0200595238095236],
 '80_2019_7': [1.4754032258064516],
 '80_2019_8': [1.3790322580645167],
 '80_2019_9': [1.7095833333333335],
 '80_2019_10': [4.447983870967742],
 '80_2019_11': [3.6262499999999998],
 '80_2019_12': [3.346370967741

In [21]:
def init_array_month(crop, month_wanted):
    data_X_month_list = []
    data_Y_list = []
    
    for i in data_X_month_avg:
        province, year, month = i.split("_")
        if int(month) == month_wanted and crop + "_" + province + "_" + year in crops_Y_year:
            data_X_month_list.append(data_X_month_avg[i])
            data_Y_list.append(crops_Y_year[crop + "_" + province + "_" + year])
    
    return np.array(data_X_month_list), np.array(data_Y_list)

In [22]:
data_rr24_month_avg_array, data_rdt_array = init_array_month("OP", 1)

In [23]:
data_rr24_month_avg_array

array([[4.34435484],
       [1.29032258],
       [1.26553763],
       [5.4391129 ],
       [2.50736559],
       [0.82741935],
       [0.53875   ],
       [3.63991935],
       [4.19314516],
       [1.44711538],
       [1.72416667],
       [4.95895161],
       [3.05892857],
       [1.42379032],
       [1.29637097],
       [2.43467742],
       [5.48387097],
       [1.40604839],
       [1.32580645],
       [3.87177419],
       [3.10564516],
       [1.13145161],
       [0.66370968],
       [2.77597926],
       [5.38475806],
       [2.72862903],
       [4.44193548],
       [5.90766129],
       [2.82557604],
       [2.76733871],
       [1.67587942],
       [3.88790323],
       [1.65967742],
       [1.38954301],
       [2.1561828 ],
       [1.64738095],
       [3.40685484],
       [1.58352535],
       [1.86733871],
       [2.18709677],
       [2.46172235],
       [1.34919355],
       [0.5781682 ],
       [3.19179147],
       [4.4110023 ],
       [0.99758065],
       [0.62741935],
       [2.240

In [24]:
data_rr24_month_avg_power_array = add_power(data_rr24_month_avg_array, [3])

In [25]:
data_rr24_month_avg_power_array

[[4.344354838709678, 18.87341896462019, 81.99282900194272],
 [1.290322580645161, 1.664932362122788, 2.14829982209392],
 [1.2655376344086022, 1.601585504104521, 2.0268667301675443],
 [5.439112903225807, 29.583949174037464, 160.91043968088363],
 [2.5073655913978494, 6.286882208925888, 15.763512127832074],
 [0.8274193548387097, 0.6846227887617066, 0.5664701461850894],
 [0.53875, 0.29025156249999995, 0.15637302929687497],
 [3.6399193548387094, 13.249012909729446, 48.22533852263214],
 [4.193145161290322, 17.582466343652438, 73.72583367243617],
 [1.4471153846153846, 2.0941429363905324, 3.030466460834376],
 [1.7241666666666666, 2.972750694444444, 5.125517655671296],
 [4.958951612903225, 24.591201099115498, 121.94657635368635],
 [3.058928571428572, 9.357044005102043, 28.622529251321076],
 [1.423790322580645, 2.027178882674297, 2.8862776752915087],
 [1.2963709677419357, 1.680577686004163, 2.1786521211707197],
 [2.4346774193548386, 5.927654136316336, 14.431925675434693],
 [5.483870967741935, 30.

# Year, Normalized

In [26]:
def normalize_year(consider_name):
    data_year = []
    data_province = []
    data_X_normalized_year = []
    
    for station in stations:
        station_data = data[data["id_sta"] == station]
        
        for year in years:
            year_station_data = station_data[station_data["year"] == year]
            if len(year_station_data):
                temp_data = year_station_data[consider_name]
                min_data, max_data = min(temp_data), max(temp_data)
                
                data_year.append(year)
                data_province.append(provinces[station])
                if max_data - min_data > 1.0e-15:
                    data_X_normalized_year.append(((temp_data - min_data) / (max_data - min_data)).values)
                else:
                    data_X_normalized_year.append(np.array([1 for _ in temp_data]))
    
    return data_year, data_province, data_X_normalized_year

In [27]:
data_year, data_province, data_rr24_normalized_year = normalize_year("rr24")

In [28]:
data_rr24_normalized_year

[array([8.90135017e-02, 1.52057927e-01, 6.82164634e-02, 2.55335366e-02,
        4.57317073e-03, 4.99237805e-02, 2.78201220e-02, 3.39176829e-02,
        1.00000000e+00, 3.23170732e-01, 2.80868902e-01, 6.17759146e-01,
        2.48094512e-01, 4.88185976e-01, 5.72408537e-01, 8.30792683e-02,
        4.83993902e-02, 2.05411585e-01, 5.52210366e-01, 8.65091463e-02,
        4.19207317e-03, 0.00000000e+00, 1.18140244e-02, 3.42987805e-03,
        0.00000000e+00, 6.85975610e-03, 6.85975610e-03, 4.38262195e-02,
        1.25762195e-02, 3.08689024e-02, 1.10518293e-02, 1.79115854e-02,
        2.58765244e-01, 4.19207317e-03, 2.47713415e-02, 2.17225610e-02,
        1.18140244e-02, 2.82012195e-02, 8.38414634e-03, 2.47713415e-02,
        1.90548780e-03, 1.41006098e-02, 1.41006098e-02, 1.01807491e-02,
        1.02896341e-02, 3.04878049e-03, 1.90548780e-03, 9.52743902e-03,
        1.18140244e-02, 1.13185976e-01, 4.91615854e-02, 5.34298780e-01,
        2.37042683e-01, 1.58917683e-01, 2.51524390e-02, 0.000000

In [29]:
def avg_X_normalized(data_X_normalized):
    data_X_avg_normalized = []
    
    for i in data_X_normalized:
        data_X_avg_normalized.append(i.sum() / len(i))
    
    return data_X_avg_normalized

In [30]:
data_rr24_normalized_year_avg = avg_X_normalized(data_rr24_normalized_year)

In [31]:
data_rr24_normalized_year_avg

[0.125913206146113,
 0.09606293566178407,
 0.08876651648234955,
 0.06357395881334284,
 0.07655691597279554,
 0.07977150756969045,
 0.13804513353799855,
 0.05699288968045912,
 0.10106367428793604,
 0.09048563599468619,
 0.079693072858715,
 0.08358969979230228,
 0.08762426207375308,
 0.06565824858105673,
 0.06914356678903924,
 0.07212435696704406,
 0.05638156196724876,
 0.08847887382996343,
 0.15641900268362408,
 0.03838645001334719,
 0.08924368473354692,
 0.11318549270493246,
 0.06764183184440721,
 0.0805013113407274,
 0.15872575152579563,
 0.07639193369978753,
 0.09086836583123101,
 0.10531617938787412,
 0.09515089448370732,
 0.05259638006236645,
 0.07162972804703496,
 0.09493433030542006,
 0.05589156914644084,
 0.0594232487697743,
 0.04158178933373792,
 0.04661803929862297,
 0.21030861517183702,
 0.08837262181075044,
 0.14099954769127873,
 0.1160530420523577,
 0.10837551860893523,
 0.07066460521291452,
 0.13169271604113159,
 0.061146462010622286,
 0.09533741271910762,
 0.0465257220088

In [32]:
def combine_X_normalized(data_X_normalized_list):
    combined_list = []
    
    for i in range(len(data_X_normalized_list[0])):
        temp_list = []
        
        for normalized_list in data_X_normalized_list:
            temp_list.append(normalized_list[i])
        combined_list.append(temp_list)
    
    return np.array(combined_list)

In [33]:
data_rr24_normalized_year_avg_array = combine_X_normalized([data_rr24_normalized_year_avg])

In [34]:
data_rr24_normalized_year_avg_array

array([[0.12591321],
       [0.09606294],
       [0.08876652],
       [0.06357396],
       [0.07655692],
       [0.07977151],
       [0.13804513],
       [0.05699289],
       [0.10106367],
       [0.09048564],
       [0.07969307],
       [0.0835897 ],
       [0.08762426],
       [0.06565825],
       [0.06914357],
       [0.07212436],
       [0.05638156],
       [0.08847887],
       [0.156419  ],
       [0.03838645],
       [0.08924368],
       [0.11318549],
       [0.06764183],
       [0.08050131],
       [0.15872575],
       [0.07639193],
       [0.09086837],
       [0.10531618],
       [0.09515089],
       [0.05259638],
       [0.07162973],
       [0.09493433],
       [0.05589157],
       [0.05942325],
       [0.04158179],
       [0.04661804],
       [0.21030862],
       [0.08837262],
       [0.14099955],
       [0.11605304],
       [0.10837552],
       [0.07066461],
       [0.13169272],
       [0.06114646],
       [0.09533741],
       [0.04652572],
       [0.07134269],
       [0.087

In [35]:
data_rr24_normalized_year_avg_power_array = add_power(data_rr24_normalized_year_avg_array, [3])

In [36]:
data_rr24_normalized_year_avg_power_array

[[0.125913206146113, 0.01585413548199355, 0.0019962450292126586],
 [0.09606293566178407, 0.009228087607960067, 0.0008864771861647748],
 [0.08876651648234955, 0.007879494448411234, 0.0006994352738274777],
 [0.06357395881334284, 0.004041648239200612, 0.00025694357869695926],
 [0.07655691597279554, 0.0058609613832656775, 0.00044869712813846996],
 [0.07977150756969045, 0.006363493419941181, 0.0005076254635185133],
 [0.13804513353799855, 0.01905645889352385, 0.00263065141271788],
 [0.05699288968045912, 0.0032481894741289833, 0.00018512370436026167],
 [0.10106367428793604, 0.010213866260578025, 0.0010322508529795969],
 [0.09048563599468619, 0.008187650321362849, 0.0007408647466306141],
 [0.079693072858715, 0.0063509858616644585, 0.0005061295789982945],
 [0.08358969979230228, 0.006987237911367221, 0.0005840611193885792],
 [0.08762426207375308, 0.007678011303969763, 0.0006727800747042851],
 [0.06565824858105673, 0.004311005606731838, 0.00028305307776112834],
 [0.06914356678903924, 0.0047808328

# Month, Normalized

In [37]:
def normalize_month(consider_name):
    data_year = []
    data_month = []
    data_province = []
    data_X_normalized_month = []
    
    for station in stations:
        station_data = data[data["id_sta"] == station]
        
        for year in years:
            for month in range(1, 13):
                year_station_data = station_data[station_data["year"] == year]
                month_station_data = year_station_data[year_station_data["month"] == month]
                
                if len(month_station_data):
                    temp_data = month_station_data[consider_name]
                    min_data, max_data = min(temp_data), max(temp_data)

                    data_year.append(year)
                    data_month.append(month)
                    data_province.append(provinces[station])
                    if max_data - min_data > 1.0e-15:
                        data_X_normalized_month.append(((temp_data - min_data) / (max_data - min_data)).values)
                    else:
                        data_X_normalized_month.append(np.array([1 for _ in temp_data]))

    return data_year, data_month, data_province, data_X_normalized_month

In [38]:
data_year, data_month, data_province, data_rr24_normalized_month = normalize_month("rr24")

In [39]:
data_rr24_normalized_month

[array([0.0890135 , 0.15205793, 0.06821646, 0.02553354, 0.00457317,
        0.04992378, 0.02782012, 0.03391768, 1.        , 0.32317073,
        0.2808689 , 0.61775915, 0.24809451, 0.48818598, 0.57240854,
        0.08307927, 0.04839939, 0.20541159, 0.55221037, 0.08650915,
        0.00419207, 0.        , 0.01181402, 0.00342988, 0.        ,
        0.00685976, 0.00685976, 0.04382622, 0.01257622, 0.0308689 ]),
 array([0.02068474, 0.03352354, 0.48430813, 0.00784593, 0.04636234,
        0.04065621, 0.02211127, 0.05278174, 0.01569187, 0.04636234,
        0.00356633, 0.02639087, 0.02639087, 0.01905441, 0.0192582 ,
        0.00570613, 0.00356633, 0.01783167, 0.02211127, 0.21184023,
        0.09201141, 1.        , 0.44365193, 0.29743224, 0.04707561,
        0.        , 0.03281027, 0.010699  , 0.03922967, 0.00713267,
        0.01212553]),
 array([0.00530824, 0.00168899, 0.00892749, 0.1621426 , 0.54156111,
        0.11086983, 0.01134033, 0.04150078, 0.01918205, 0.04632646,
        0.6410906 , 0.71

In [40]:
data_rr24_normalized_month_avg = avg_X_normalized(data_rr24_normalized_month)

In [41]:
data_rr24_normalized_month_avg

[0.1692526858304298,
 0.10026492765437131,
 0.2160189350200208,
 0.23039534903054,
 0.20685436820115183,
 0.19545568666834043,
 0.1177479944742008,
 0.12896051974012993,
 0.13782505278349416,
 0.05211930926216639,
 0.0637763440860215,
 0.19355642348554256,
 0.06624326404926867,
 0.0832131109700502,
 0.1805815160955348,
 0.26882644994868665,
 0.23614820902677902,
 0.15391294281083587,
 0.22850550694353758,
 0.14887005649717514,
 0.21290620852161726,
 0.1526805276101051,
 0.06125182047976966,
 0.18389119223379508,
 0.18760402978537014,
 0.21634746655224876,
 0.18545568039950064,
 0.2656008576924289,
 0.15561682443402874,
 0.35798234100942616,
 0.19088975540588446,
 0.13904267589388697,
 0.06591049258573997,
 0.11610904584882281,
 0.1092375366568915,
 0.10596746994505794,
 0.1310726310726311,
 0.42345965668795094,
 0.13752566934015265,
 0.30462601914214815,
 0.17318019281553645,
 0.16930565329580105,
 0.19938668396532402,
 0.09604978354978355,
 0.24091307432291456,
 0.2242065742916565,
 0

In [42]:
data_rr24_normalized_month_avg_array = combine_X_normalized([data_rr24_normalized_month_avg])

In [43]:
data_rr24_normalized_month_avg_array

array([[0.16925269],
       [0.10026493],
       [0.21601894],
       ...,
       [0.09225473],
       [0.08853681],
       [1.        ]])

In [44]:
data_rr24_normalized_month_avg_power_array = add_power(data_rr24_normalized_month_avg_array, [3])

In [45]:
data_rr24_normalized_month_avg_power_array

[[0.1692526858304298, 0.02864647166081417, 0.0048484922681580915],
 [0.10026492765437131, 0.010053055717536313, 0.0010079689042241423],
 [0.2160189350200208, 0.04666418028718398, 0.010080346529219731],
 [0.23039534903054, 0.053082016854904344, 0.012229849800530693],
 [0.20685436820115183, 0.04278872964389769, 0.008851035636618354],
 [0.19545568666834043, 0.03820292545099247, 0.007466979026763152],
 [0.1177479944742008, 0.013864590202696422, 0.0016325276905741568],
 [0.12896051974012993, 0.01663081565164444, 0.0021447186301383546],
 [0.13782505278349416, 0.01899574517477295, 0.0026180895813748867],
 [0.05211930926216639, 0.002716422397965343, 0.00014157805904623134],
 [0.0637763440860215, 0.004067422064978609, 0.0002594053091591518],
 [0.19355642348554256, 0.03746408907251469, 0.0072514150900197406],
 [0.06624326404926867, 0.0043881700319011304, 0.0002906867061163143],
 [0.0832131109700502, 0.006924421837313889, 0.0005762026827518395],
 [0.1805815160955348, 0.032609683955361894, 0.00588

In [3]:
a = np.array([1,2,3])

In [5]:
random.random()

1