In [16]:
import pandas as pd
import numpy as np

In [82]:
REGIONS = ["asahi", "ichihara", "katori", "narita", "sanmu"]
RAW_DIR = '../data/raw/'


In [83]:
yield_dfs = {}
for region in REGIONS:
    yield_dfs[region] = pd.read_csv(RAW_DIR + f"{region}_yields_df.csv", index_col=0)
    
print(yield_dfs['asahi'].head(10))

    Year  CityId City  Yields
31  2023       2   旭市     585
32  2022       2   旭市     573
33  2021       2   旭市     574
34  2020       2   旭市     568
35  2019       2   旭市     538
36  2018       2   旭市     567
37  2017       2   旭市     572
38  2016       2   旭市     584
39  2015       2   旭市     565
40  2014       2   旭市     588


In [84]:
combined_yield_df = pd.concat([yield_dfs['asahi'], yield_dfs['ichihara'], yield_dfs['katori'], yield_dfs['narita'], yield_dfs['sanmu']], axis=0)
combined_yield_df.head(10)
combined_yield_df.tail(10)
combined_yield_df.drop(columns=['CityId'], inplace=True)
combined_yield_df.reset_index(drop=True, inplace=True)


In [85]:
combined_yield_df.to_csv(RAW_DIR + 'combined_yield_df.csv')

In [86]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
combined_yield_df['Yields'] = scaler.fit_transform(combined_yield_df[['Yields']])

combined_yield_df.head(10)

Unnamed: 0,Year,City,Yields
0,2023,旭市,1.412861
1,2022,旭市,1.063267
2,2021,旭市,1.0924
3,2020,旭市,0.917603
4,2019,旭市,0.043617
5,2018,旭市,0.88847
6,2017,旭市,1.034134
7,2016,旭市,1.383728
8,2015,旭市,0.830204
9,2014,旭市,1.500259


In [87]:
combined_yield_df.rename(columns={'Yields': 'yield', 'Year': 'year', 'City': 'region'}, inplace=True)
combined_yield_df.reset_index(drop=True, inplace=True)
combined_yield_df.head(10)

Unnamed: 0,year,region,yield
0,2023,旭市,1.412861
1,2022,旭市,1.063267
2,2021,旭市,1.0924
3,2020,旭市,0.917603
4,2019,旭市,0.043617
5,2018,旭市,0.88847
6,2017,旭市,1.034134
7,2016,旭市,1.383728
8,2015,旭市,0.830204
9,2014,旭市,1.500259


In [88]:
combined_yield_df.sort_values(by=['region', 'year'], ascending=[False, False], inplace=True)
combined_yield_df.head(10)

Unnamed: 0,year,region,yield
92,2023,香取市,1.208931
91,2022,香取市,0.859337
90,2021,香取市,0.917603
89,2020,香取市,0.713673
88,2019,香取市,-0.335109
87,2018,香取市,0.68454
86,2017,香取市,0.801071
85,2016,香取市,1.267197
84,2015,香取市,0.538876
83,2014,香取市,1.354595


In [89]:
combined_yield_df.insert(1, 'region_id', None)
combined_yield_df.head(10)

Unnamed: 0,year,region_id,region,yield
92,2023,,香取市,1.208931
91,2022,,香取市,0.859337
90,2021,,香取市,0.917603
89,2020,,香取市,0.713673
88,2019,,香取市,-0.335109
87,2018,,香取市,0.68454
86,2017,,香取市,0.801071
85,2016,,香取市,1.267197
84,2015,,香取市,0.538876
83,2014,,香取市,1.354595


REGIONS = ["asahi", "ichihara", "katori", "narita", "sanmu"]


In [90]:
REGIONS_KANJI = ['旭市', '市原市', '香取市', '成田市', '山武市']


def region_id(region_str):
    return REGIONS_KANJI.index(region_str) + 1
combined_yield_df.region_id = combined_yield_df['region'].apply(lambda x: region_id(x))
combined_yield_df.head(10)

Unnamed: 0,year,region_id,region,yield
92,2023,3,香取市,1.208931
91,2022,3,香取市,0.859337
90,2021,3,香取市,0.917603
89,2020,3,香取市,0.713673
88,2019,3,香取市,-0.335109
87,2018,3,香取市,0.68454
86,2017,3,香取市,0.801071
85,2016,3,香取市,1.267197
84,2015,3,香取市,0.538876
83,2014,3,香取市,1.354595


In [91]:
combined_yield_df.sort_values(by=['region_id', 'year'], ascending=[True, False], inplace=True)
combined_yield_df.head(10)

Unnamed: 0,year,region_id,region,yield
0,2023,1,旭市,1.412861
1,2022,1,旭市,1.063267
2,2021,1,旭市,1.0924
3,2020,1,旭市,0.917603
4,2019,1,旭市,0.043617
5,2018,1,旭市,0.88847
6,2017,1,旭市,1.034134
7,2016,1,旭市,1.383728
8,2015,1,旭市,0.830204
9,2014,1,旭市,1.500259


In [92]:
combined_yield_df.reset_index(drop=True, inplace=True)
combined_yield_df.head(10)

Unnamed: 0,year,region_id,region,yield
0,2023,1,旭市,1.412861
1,2022,1,旭市,1.063267
2,2021,1,旭市,1.0924
3,2020,1,旭市,0.917603
4,2019,1,旭市,0.043617
5,2018,1,旭市,0.88847
6,2017,1,旭市,1.034134
7,2016,1,旭市,1.383728
8,2015,1,旭市,0.830204
9,2014,1,旭市,1.500259


In [93]:
REGIONS_CAP = ['Asahi', 'Ichihara', 'Katori', 'Narita', 'Sanmu']
combined_yield_df['region'] = combined_yield_df.region_id.apply(lambda x: REGIONS_CAP[x-1])
combined_yield_df.head(10)

Unnamed: 0,year,region_id,region,yield
0,2023,1,Asahi,1.412861
1,2022,1,Asahi,1.063267
2,2021,1,Asahi,1.0924
3,2020,1,Asahi,0.917603
4,2019,1,Asahi,0.043617
5,2018,1,Asahi,0.88847
6,2017,1,Asahi,1.034134
7,2016,1,Asahi,1.383728
8,2015,1,Asahi,0.830204
9,2014,1,Asahi,1.500259


In [94]:
combined_yield_df.drop(columns=['region_id'], inplace=True)

In [95]:
combined_yield_df.to_csv('../data/processed/scaled_yield_df.csv')
yld = combined_yield_df

In [96]:
climate_dfs = {}
for region in REGIONS:
    climate_dfs[region] = pd.read_csv(RAW_DIR + f"{region}_climate_df.csv", index_col=0)
    climate_dfs[region].reset_index(drop=True, inplace=True)
    
print(climate_dfs['asahi'].head(10))
        

   valid_time   city    temp_2m  soil_temp_l1  soil_water_vol_l1  \
0  2023-12-01  Asahi  283.48364     283.47485           0.554549   
1  2023-11-01  Asahi  288.43823     288.65186           0.547770   
2  2023-10-01  Asahi  292.17920     292.50635           0.584982   
3  2023-09-01  Asahi  299.33276     299.68896           0.517402   
4  2023-08-01  Asahi  301.20215     302.06885           0.303744   
5  2023-07-01  Asahi  299.70557     300.10034           0.498277   
6  2023-06-01  Asahi  295.30030     295.37354           0.672630   
7  2023-05-01  Asahi  291.23390     291.53150           0.646380   
8  2023-04-01  Asahi  288.59620     288.88452           0.633264   
9  2023-03-01  Asahi  285.72680     285.79736           0.643218   

   net_solar_radiation  total_rain  
0            6354285.0    0.000808  
1            7189851.0    0.001133  
2            9564114.0    0.002618  
3           10485446.0    0.002806  
4           14206971.0    0.000805  
5           14889880.0    0.0

In [97]:
clm = pd.concat([climate_dfs['asahi'], climate_dfs['ichihara'], climate_dfs['katori'], climate_dfs['narita'], climate_dfs['sanmu']], axis=0)
clm.head(10)

Unnamed: 0,valid_time,city,temp_2m,soil_temp_l1,soil_water_vol_l1,net_solar_radiation,total_rain
0,2023-12-01,Asahi,283.48364,283.47485,0.554549,6354285.0,0.000808
1,2023-11-01,Asahi,288.43823,288.65186,0.54777,7189851.0,0.001133
2,2023-10-01,Asahi,292.1792,292.50635,0.584982,9564114.0,0.002618
3,2023-09-01,Asahi,299.33276,299.68896,0.517402,10485446.0,0.002806
4,2023-08-01,Asahi,301.20215,302.06885,0.303744,14206971.0,0.000805
5,2023-07-01,Asahi,299.70557,300.10034,0.498277,14889880.0,0.000591
6,2023-06-01,Asahi,295.3003,295.37354,0.67263,11954429.0,0.004443
7,2023-05-01,Asahi,291.2339,291.5315,0.64638,12810040.0,0.00288
8,2023-04-01,Asahi,288.5962,288.88452,0.633264,13007223.0,0.002593
9,2023-03-01,Asahi,285.7268,285.79736,0.643218,10119643.0,0.002134


In [98]:
clm.rename(columns={'city': 'region'}, inplace=True)
clm.head(10)

Unnamed: 0,valid_time,region,temp_2m,soil_temp_l1,soil_water_vol_l1,net_solar_radiation,total_rain
0,2023-12-01,Asahi,283.48364,283.47485,0.554549,6354285.0,0.000808
1,2023-11-01,Asahi,288.43823,288.65186,0.54777,7189851.0,0.001133
2,2023-10-01,Asahi,292.1792,292.50635,0.584982,9564114.0,0.002618
3,2023-09-01,Asahi,299.33276,299.68896,0.517402,10485446.0,0.002806
4,2023-08-01,Asahi,301.20215,302.06885,0.303744,14206971.0,0.000805
5,2023-07-01,Asahi,299.70557,300.10034,0.498277,14889880.0,0.000591
6,2023-06-01,Asahi,295.3003,295.37354,0.67263,11954429.0,0.004443
7,2023-05-01,Asahi,291.2339,291.5315,0.64638,12810040.0,0.00288
8,2023-04-01,Asahi,288.5962,288.88452,0.633264,13007223.0,0.002593
9,2023-03-01,Asahi,285.7268,285.79736,0.643218,10119643.0,0.002134


In [99]:
REGIONS = ['Asahi', 'Ichihara', 'Narita', 'Katori', 'Sanmu']

In [100]:
"""Transform date to year and month"""
clm['date'] = pd.to_datetime(clm['valid_time'])
clm['year'] = clm['date'].dt.year
clm['month'] = clm['date'].dt.month
clm

Unnamed: 0,valid_time,region,temp_2m,soil_temp_l1,soil_water_vol_l1,net_solar_radiation,total_rain,date,year,month
0,2023-12-01,Asahi,283.483640,283.474850,0.554549,6.354285e+06,0.000808,2023-12-01,2023,12
1,2023-11-01,Asahi,288.438230,288.651860,0.547770,7.189851e+06,0.001133,2023-11-01,2023,11
2,2023-10-01,Asahi,292.179200,292.506350,0.584982,9.564114e+06,0.002618,2023-10-01,2023,10
3,2023-09-01,Asahi,299.332760,299.688960,0.517402,1.048545e+07,0.002806,2023-09-01,2023,9
4,2023-08-01,Asahi,301.202150,302.068850,0.303744,1.420697e+07,0.000805,2023-08-01,2023,8
...,...,...,...,...,...,...,...,...,...,...
403,1990-05-01,Sanmu,290.777947,290.805650,0.399937,1.606790e+07,0.003068,1990-05-01,1990,5
404,1990-04-01,Sanmu,286.536667,286.543010,0.418977,1.409122e+07,0.004302,1990-04-01,1990,4
405,1990-03-01,Sanmu,282.573817,282.909043,0.385746,1.402810e+07,0.002267,1990-03-01,1990,3
406,1990-02-01,Sanmu,280.447720,280.441483,0.444024,6.946390e+06,0.004161,1990-02-01,1990,2


In [101]:
STAGES = {
        'prep_': [1, 2, 3, 4],  # 1-4月: 準備期
        'plnt_': [5],              # 5月: 田植え期
        'grow_': [6, 7],            # 6-7月: 生育期
        'head_': [8, 9],            # 8-9月: 出穂・登熟期
        'hvst_': [10]            # 10月: 収穫期
    }

"""育成ステージごとに集計"""

prep_data = clm[clm['month'].isin(STAGES['prep_'])]
plnt_data = clm[clm['month'].isin(STAGES['plnt_'])]
grow_data = clm[clm['month'].isin(STAGES['grow_'])]
head_data = clm[clm['month'].isin(STAGES['head_'])]
hvst_data = clm[clm['month'].isin(STAGES['hvst_'])]

In [102]:
grow_data.head(10)

Unnamed: 0,valid_time,region,temp_2m,soil_temp_l1,soil_water_vol_l1,net_solar_radiation,total_rain,date,year,month
5,2023-07-01,Asahi,299.70557,300.10034,0.498277,14889880.0,0.000591,2023-07-01,2023,7
6,2023-06-01,Asahi,295.3003,295.37354,0.67263,11954429.0,0.004443,2023-06-01,2023,6
17,2022-07-01,Asahi,298.97437,299.25,0.60587,18267416.0,0.006485,2022-07-01,2022,7
18,2022-06-01,Asahi,294.95044,295.24194,0.619105,18920916.0,0.002721,2022-06-01,2022,6
29,2021-07-01,Asahi,297.9685,298.18994,0.663525,16967882.0,0.009666,2021-07-01,2021,7
30,2021-06-01,Asahi,294.78223,295.10913,0.590483,17372984.0,0.004754,2021-06-01,2021,6
41,2020-07-01,Asahi,297.28564,297.22876,0.714153,11487241.0,0.009721,2020-07-01,2020,7
42,2020-06-01,Asahi,295.51855,295.77808,0.650197,16321864.0,0.008005,2020-06-01,2020,6
53,2019-07-01,Asahi,296.8733,296.9375,0.693243,14084238.0,0.00761,2019-07-01,2019,7
54,2019-06-01,Asahi,294.26514,294.56445,0.64074,17132032.0,0.00667,2019-06-01,2019,6


In [103]:
"""育成ステージごとに集計"""

agg_prep = prep_data.groupby(['region', 'year']).agg({
    'temp_2m': 'mean',
    'soil_temp_l1': 'mean',
    'soil_water_vol_l1': 'mean',
    'net_solar_radiation': 'sum',
    'total_rain': 'sum' 
}).reset_index()

agg_plnt = plnt_data.groupby(['region', 'year']).agg({
    'temp_2m': 'mean',
    'soil_temp_l1': 'mean',
    'soil_water_vol_l1': 'mean',
    'net_solar_radiation': 'sum',
    'total_rain': 'sum' 
}).reset_index()

agg_grow = grow_data.groupby(['region', 'year']).agg({
    'temp_2m': 'mean',
    'soil_temp_l1': 'mean',
    'soil_water_vol_l1': 'mean',
    'net_solar_radiation': 'sum',
    'total_rain': 'sum' 
}).reset_index()

agg_head = head_data.groupby(['region', 'year']).agg({
    'temp_2m': 'mean',
    'soil_temp_l1': 'mean',
    'soil_water_vol_l1': 'mean',
    'net_solar_radiation': 'sum',
    'total_rain': 'sum' 
}).reset_index()

agg_hvst = hvst_data.groupby(['region', 'year']).agg({
    'temp_2m': 'mean',
    'soil_temp_l1': 'mean',
    'soil_water_vol_l1': 'mean',
    'net_solar_radiation': 'sum',
    'total_rain': 'sum' 
}).reset_index()

agg_hvst.head(10)

Unnamed: 0,region,year,temp_2m,soil_temp_l1,soil_water_vol_l1,net_solar_radiation,total_rain
0,Asahi,1990,292.10376,292.28052,0.635731,10271036.0,0.004837
1,Asahi,1991,291.60645,291.78247,0.713997,7884307.0,0.014973
2,Asahi,1992,290.56104,290.70142,0.681179,10177077.0,0.009636
3,Asahi,1993,290.31396,290.4016,0.674495,10533897.0,0.005869
4,Asahi,1994,292.6709,292.80835,0.64127,9785443.0,0.003555
5,Asahi,1995,292.03662,292.0647,0.631168,11153036.0,0.003198
6,Asahi,1996,290.71997,290.6438,0.672642,10998594.0,0.00324
7,Asahi,1997,290.85742,291.09448,0.541491,13196655.0,0.00157
8,Asahi,1998,293.09863,293.24536,0.678877,10984828.0,0.003808
9,Asahi,1999,292.28125,292.69434,0.463348,10841017.0,0.00435


In [104]:

"""カラムを再命名"""

agg_prep.columns = ['region', 'year'] + [f'prep_{col}' for col in agg_prep.columns[2:]]
agg_grow.columns = ['region', 'year'] + [f'grow_{col}' for col in agg_grow.columns[2:]]
agg_plnt.columns = ['region', 'year'] + [f'plnt_{col}' for col in agg_plnt.columns[2:]]
agg_head.columns = ['region', 'year'] + [f'head_{col}' for col in agg_head.columns[2:]]
agg_hvst.columns = ['region', 'year'] + [f'hvst_{col}' for col in agg_hvst.columns[2:]]

agg_hvst

Unnamed: 0,region,year,hvst_temp_2m,hvst_soil_temp_l1,hvst_soil_water_vol_l1,hvst_net_solar_radiation,hvst_total_rain
0,Asahi,1990,292.103760,292.280520,0.635731,1.027104e+07,0.004837
1,Asahi,1991,291.606450,291.782470,0.713997,7.884307e+06,0.014973
2,Asahi,1992,290.561040,290.701420,0.681179,1.017708e+07,0.009636
3,Asahi,1993,290.313960,290.401600,0.674495,1.053390e+07,0.005869
4,Asahi,1994,292.670900,292.808350,0.641270,9.785443e+06,0.003555
...,...,...,...,...,...,...,...
165,Sanmu,2019,292.700990,293.172830,0.401950,9.093761e+06,0.009707
166,Sanmu,2020,290.419040,290.970547,0.415692,8.081537e+06,0.008664
167,Sanmu,2021,291.082290,291.677160,0.423054,9.797489e+06,0.007006
168,Sanmu,2022,290.701223,291.260700,0.410848,7.201774e+06,0.002366


In [105]:
"""ステージ分けしたものを集計"""

agg_data = agg_prep.merge(agg_plnt, on=['region', 'year'])\
    .merge(agg_grow, on=['region', 'year']).merge(agg_head, on=['region', 'year']).merge(agg_hvst, on=['region', 'year'])
agg_data

Unnamed: 0,region,year,prep_temp_2m,prep_soil_temp_l1,prep_soil_water_vol_l1,prep_net_solar_radiation,prep_total_rain,plnt_temp_2m,plnt_soil_temp_l1,plnt_soil_water_vol_l1,...,head_temp_2m,head_soil_temp_l1,head_soil_water_vol_l1,head_net_solar_radiation,head_total_rain,hvst_temp_2m,hvst_soil_temp_l1,hvst_soil_water_vol_l1,hvst_net_solar_radiation,hvst_total_rain
0,Asahi,1990,282.461243,282.612365,0.662260,4.551346e+07,0.015188,290.802980,291.058840,0.637171,...,298.554200,299.188600,0.378798,3.384898e+07,0.006725,292.103760,292.280520,0.635731,1.027104e+07,0.004837
1,Asahi,1991,282.004518,282.206428,0.635963,5.055789e+07,0.012777,290.644530,291.097170,0.574348,...,297.136110,297.487915,0.622159,2.777427e+07,0.012765,291.606450,291.782470,0.713997,7.884307e+06,0.014973
2,Asahi,1992,282.343628,282.492493,0.665439,4.733209e+07,0.018756,289.493160,289.940920,0.660066,...,296.919310,297.469115,0.422992,3.238540e+07,0.003984,290.561040,290.701420,0.681179,1.017708e+07,0.009636
3,Asahi,1993,281.706973,282.013978,0.665435,5.160159e+07,0.016412,289.971200,290.321300,0.604974,...,296.350470,296.450315,0.662248,2.596279e+07,0.011812,290.313960,290.401600,0.674495,1.053390e+07,0.005869
4,Asahi,1994,281.167113,281.453980,0.657792,5.232052e+07,0.015580,291.502440,291.631100,0.592175,...,298.788210,299.501465,0.451333,3.355636e+07,0.009248,292.670900,292.808350,0.641270,9.785443e+06,0.003555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,Sanmu,2019,281.608318,281.759388,0.393054,4.725456e+07,0.011992,291.661863,291.563710,0.375617,...,299.065870,299.399733,0.336853,3.046980e+07,0.006204,292.700990,293.172830,0.401950,9.093761e+06,0.009707
166,Sanmu,2020,282.359143,282.662871,0.417121,4.690619e+07,0.014675,291.865773,291.868460,0.397321,...,299.116635,299.561637,0.319046,3.041366e+07,0.004273,290.419040,290.970547,0.415692,8.081537e+06,0.008664
167,Sanmu,2021,282.719350,282.935262,0.378554,5.117995e+07,0.011013,292.152200,292.200233,0.361492,...,297.424150,297.626685,0.408903,2.535146e+07,0.013081,291.082290,291.677160,0.423054,9.797489e+06,0.007006
168,Sanmu,2022,281.335901,281.546596,0.401751,4.797231e+07,0.014670,291.124483,291.110290,0.415453,...,298.412457,298.750322,0.387466,2.483614e+07,0.006806,290.701223,291.260700,0.410848,7.201774e+06,0.002366


In [106]:
"""収穫のデータがない1992年以前をDROP"""

new_agg_data = agg_data.drop(agg_data[agg_data.year < 1993].index, axis=0)
new_agg_data

Unnamed: 0,region,year,prep_temp_2m,prep_soil_temp_l1,prep_soil_water_vol_l1,prep_net_solar_radiation,prep_total_rain,plnt_temp_2m,plnt_soil_temp_l1,plnt_soil_water_vol_l1,...,head_temp_2m,head_soil_temp_l1,head_soil_water_vol_l1,head_net_solar_radiation,head_total_rain,hvst_temp_2m,hvst_soil_temp_l1,hvst_soil_water_vol_l1,hvst_net_solar_radiation,hvst_total_rain
3,Asahi,1993,281.706973,282.013978,0.665435,5.160159e+07,0.016412,289.971200,290.321300,0.604974,...,296.350470,296.450315,0.662248,2.596279e+07,0.011812,290.313960,290.401600,0.674495,1.053390e+07,0.005869
4,Asahi,1994,281.167113,281.453980,0.657792,5.232052e+07,0.015580,291.502440,291.631100,0.592175,...,298.788210,299.501465,0.451333,3.355636e+07,0.009248,292.670900,292.808350,0.641270,9.785443e+06,0.003555
5,Asahi,1995,281.644045,281.878540,0.609441,5.173130e+07,0.011158,290.895750,291.156980,0.652623,...,297.966790,298.696535,0.398301,3.292247e+07,0.009369,292.036620,292.064700,0.631168,1.115304e+07,0.003198
6,Asahi,1996,280.903805,281.197510,0.583144,5.212202e+07,0.010072,289.760000,290.222660,0.560206,...,295.973025,296.422000,0.532310,3.019782e+07,0.012307,290.719970,290.643800,0.672642,1.099859e+07,0.003240
7,Asahi,1997,282.174742,282.406435,0.606504,5.324435e+07,0.010833,291.313720,291.622070,0.593961,...,297.166015,297.626465,0.480934,3.024237e+07,0.005503,290.857420,291.094480,0.541491,1.319666e+07,0.001570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,Sanmu,2019,281.608318,281.759388,0.393054,4.725456e+07,0.011992,291.661863,291.563710,0.375617,...,299.065870,299.399733,0.336853,3.046980e+07,0.006204,292.700990,293.172830,0.401950,9.093761e+06,0.009707
166,Sanmu,2020,282.359143,282.662871,0.417121,4.690619e+07,0.014675,291.865773,291.868460,0.397321,...,299.116635,299.561637,0.319046,3.041366e+07,0.004273,290.419040,290.970547,0.415692,8.081537e+06,0.008664
167,Sanmu,2021,282.719350,282.935262,0.378554,5.117995e+07,0.011013,292.152200,292.200233,0.361492,...,297.424150,297.626685,0.408903,2.535146e+07,0.013081,291.082290,291.677160,0.423054,9.797489e+06,0.007006
168,Sanmu,2022,281.335901,281.546596,0.401751,4.797231e+07,0.014670,291.124483,291.110290,0.415453,...,298.412457,298.750322,0.387466,2.483614e+07,0.006806,290.701223,291.260700,0.410848,7.201774e+06,0.002366


In [107]:
new_agg_data.to_csv('../data/processed/staged_climate_df.csv')


In [108]:
"""収穫データとマージ"""

data = pd.merge(agg_data, yld)    
data

Unnamed: 0,region,year,prep_temp_2m,prep_soil_temp_l1,prep_soil_water_vol_l1,prep_net_solar_radiation,prep_total_rain,plnt_temp_2m,plnt_soil_temp_l1,plnt_soil_water_vol_l1,...,head_soil_temp_l1,head_soil_water_vol_l1,head_net_solar_radiation,head_total_rain,hvst_temp_2m,hvst_soil_temp_l1,hvst_soil_water_vol_l1,hvst_net_solar_radiation,hvst_total_rain,yield
0,Asahi,1993,281.706973,282.013978,0.665435,5.160159e+07,0.016412,289.971200,290.321300,0.604974,...,296.450315,0.662248,2.596279e+07,0.011812,290.313960,290.401600,0.674495,1.053390e+07,0.005869,-3.160995
1,Asahi,1994,281.167113,281.453980,0.657792,5.232052e+07,0.015580,291.502440,291.631100,0.592175,...,299.501465,0.451333,3.355636e+07,0.009248,292.670900,292.808350,0.641270,9.785443e+06,0.003555,0.946735
2,Asahi,1995,281.644045,281.878540,0.609441,5.173130e+07,0.011158,290.895750,291.156980,0.652623,...,298.696535,0.398301,3.292247e+07,0.009369,292.036620,292.064700,0.631168,1.115304e+07,0.003198,-0.539039
3,Asahi,1996,280.903805,281.197510,0.583144,5.212202e+07,0.010072,289.760000,290.222660,0.560206,...,296.422000,0.532310,3.019782e+07,0.012307,290.719970,290.643800,0.672642,1.099859e+07,0.003240,-0.247711
4,Asahi,1997,282.174742,282.406435,0.606504,5.324435e+07,0.010833,291.313720,291.622070,0.593961,...,297.626465,0.480934,3.024237e+07,0.005503,290.857420,291.094480,0.541491,1.319666e+07,0.001570,0.509743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,Sanmu,2019,281.608318,281.759388,0.393054,4.725456e+07,0.011992,291.661863,291.563710,0.375617,...,299.399733,0.336853,3.046980e+07,0.006204,292.700990,293.172830,0.401950,9.093761e+06,0.009707,-0.335109
151,Sanmu,2020,282.359143,282.662871,0.417121,4.690619e+07,0.014675,291.865773,291.868460,0.397321,...,299.561637,0.319046,3.041366e+07,0.004273,290.419040,290.970547,0.415692,8.081537e+06,0.008664,0.538876
152,Sanmu,2021,282.719350,282.935262,0.378554,5.117995e+07,0.011013,292.152200,292.200233,0.361492,...,297.626685,0.408903,2.535146e+07,0.013081,291.082290,291.677160,0.423054,9.797489e+06,0.007006,0.742806
153,Sanmu,2022,281.335901,281.546596,0.401751,4.797231e+07,0.014670,291.124483,291.110290,0.415453,...,298.750322,0.387466,2.483614e+07,0.006806,290.701223,291.260700,0.410848,7.201774e+06,0.002366,0.713673


In [109]:
data.columns[2:27]

Index(['prep_temp_2m', 'prep_soil_temp_l1', 'prep_soil_water_vol_l1',
       'prep_net_solar_radiation', 'prep_total_rain', 'plnt_temp_2m',
       'plnt_soil_temp_l1', 'plnt_soil_water_vol_l1',
       'plnt_net_solar_radiation', 'plnt_total_rain', 'grow_temp_2m',
       'grow_soil_temp_l1', 'grow_soil_water_vol_l1',
       'grow_net_solar_radiation', 'grow_total_rain', 'head_temp_2m',
       'head_soil_temp_l1', 'head_soil_water_vol_l1',
       'head_net_solar_radiation', 'head_total_rain', 'hvst_temp_2m',
       'hvst_soil_temp_l1', 'hvst_soil_water_vol_l1',
       'hvst_net_solar_radiation', 'hvst_total_rain'],
      dtype='object')

In [110]:

columns_to_scale = data.columns[2:27]


data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])

data

Unnamed: 0,region,year,prep_temp_2m,prep_soil_temp_l1,prep_soil_water_vol_l1,prep_net_solar_radiation,prep_total_rain,plnt_temp_2m,plnt_soil_temp_l1,plnt_soil_water_vol_l1,...,head_soil_temp_l1,head_soil_water_vol_l1,head_net_solar_radiation,head_total_rain,hvst_temp_2m,hvst_soil_temp_l1,hvst_soil_water_vol_l1,hvst_net_solar_radiation,hvst_total_rain,yield
0,Asahi,1993,-0.060562,-0.003973,2.238262,0.959389,0.951731,-1.572648,-1.342187,1.813258,...,-1.779114,2.890841,-1.078292,0.628321,-1.455501,-1.898463,2.120006,0.689816,-0.110071,-3.160995
1,Asahi,1994,-0.805556,-0.793434,2.159424,1.180903,0.654922,0.434492,0.458058,1.678270,...,1.193804,0.613343,1.850544,-0.119432,1.678067,1.329429,1.796030,0.005523,-0.717270,0.946735
2,Asahi,1995,-0.147400,-0.194907,1.660719,0.999355,-0.923270,-0.360754,-0.193593,2.315816,...,0.409513,0.040694,1.606052,-0.084138,0.834788,0.332058,1.697519,1.255880,-0.810901,-0.539039
3,Asahi,1996,-1.168914,-1.154994,1.389486,1.119742,-1.310899,-1.849488,-1.477762,1.341087,...,-1.806703,1.487743,0.555155,0.772871,-0.915708,-1.573629,2.101935,1.114677,-0.799842,-0.247711
4,Asahi,1997,0.584949,0.549297,1.630429,1.465555,-1.039116,0.187119,0.445647,1.697100,...,-0.633121,0.932983,0.572341,-1.211379,-0.732967,-0.969184,0.823075,3.124311,-1.237997,0.509743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,Sanmu,2019,-0.196702,-0.362883,-0.571154,-0.380016,-0.625504,0.643463,0.365435,-0.605786,...,1.094681,-0.622836,0.660059,-1.007219,1.718072,1.818263,-0.537590,-0.626865,0.896926,-0.335109
151,Sanmu,2020,0.839417,0.910809,-0.322928,-0.487355,0.332045,0.910747,0.784296,-0.376874,...,1.252433,-0.815119,0.638405,-1.570382,-1.315796,-1.135402,-0.403589,-1.552317,0.623271,0.538876
152,Sanmu,2021,1.336494,1.294814,-0.720712,0.829473,-0.974916,1.286193,1.240299,-0.754766,...,-0.632906,0.155173,-1.314081,0.998547,-0.434001,-0.187704,-0.331801,0.016537,0.188167,0.742806
153,Sanmu,2022,-0.572632,-0.662868,-0.481456,-0.158865,0.330074,-0.060931,-0.257765,-0.185638,...,0.461920,-0.076307,-1.512841,-0.831394,-0.940632,-0.746253,-0.450828,-2.356662,-1.029052,0.713673


In [111]:
data.reset_index(drop=True, inplace=True)
data

Unnamed: 0,region,year,prep_temp_2m,prep_soil_temp_l1,prep_soil_water_vol_l1,prep_net_solar_radiation,prep_total_rain,plnt_temp_2m,plnt_soil_temp_l1,plnt_soil_water_vol_l1,...,head_soil_temp_l1,head_soil_water_vol_l1,head_net_solar_radiation,head_total_rain,hvst_temp_2m,hvst_soil_temp_l1,hvst_soil_water_vol_l1,hvst_net_solar_radiation,hvst_total_rain,yield
0,Asahi,1993,-0.060562,-0.003973,2.238262,0.959389,0.951731,-1.572648,-1.342187,1.813258,...,-1.779114,2.890841,-1.078292,0.628321,-1.455501,-1.898463,2.120006,0.689816,-0.110071,-3.160995
1,Asahi,1994,-0.805556,-0.793434,2.159424,1.180903,0.654922,0.434492,0.458058,1.678270,...,1.193804,0.613343,1.850544,-0.119432,1.678067,1.329429,1.796030,0.005523,-0.717270,0.946735
2,Asahi,1995,-0.147400,-0.194907,1.660719,0.999355,-0.923270,-0.360754,-0.193593,2.315816,...,0.409513,0.040694,1.606052,-0.084138,0.834788,0.332058,1.697519,1.255880,-0.810901,-0.539039
3,Asahi,1996,-1.168914,-1.154994,1.389486,1.119742,-1.310899,-1.849488,-1.477762,1.341087,...,-1.806703,1.487743,0.555155,0.772871,-0.915708,-1.573629,2.101935,1.114677,-0.799842,-0.247711
4,Asahi,1997,0.584949,0.549297,1.630429,1.465555,-1.039116,0.187119,0.445647,1.697100,...,-0.633121,0.932983,0.572341,-1.211379,-0.732967,-0.969184,0.823075,3.124311,-1.237997,0.509743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150,Sanmu,2019,-0.196702,-0.362883,-0.571154,-0.380016,-0.625504,0.643463,0.365435,-0.605786,...,1.094681,-0.622836,0.660059,-1.007219,1.718072,1.818263,-0.537590,-0.626865,0.896926,-0.335109
151,Sanmu,2020,0.839417,0.910809,-0.322928,-0.487355,0.332045,0.910747,0.784296,-0.376874,...,1.252433,-0.815119,0.638405,-1.570382,-1.315796,-1.135402,-0.403589,-1.552317,0.623271,0.538876
152,Sanmu,2021,1.336494,1.294814,-0.720712,0.829473,-0.974916,1.286193,1.240299,-0.754766,...,-0.632906,0.155173,-1.314081,0.998547,-0.434001,-0.187704,-0.331801,0.016537,0.188167,0.742806
153,Sanmu,2022,-0.572632,-0.662868,-0.481456,-0.158865,0.330074,-0.060931,-0.257765,-0.185638,...,0.461920,-0.076307,-1.512841,-0.831394,-0.940632,-0.746253,-0.450828,-2.356662,-1.029052,0.713673


In [112]:
data.to_csv('../data/processed/processed.csv')