In [5]:
# 将预报数据和S2S都重组成LSTM模型需要的每年的数据，并输出到04_mergeData，包含两个文件夹
# 01_ECMWF;02_histdata[mean和similar]

def process_climate_data(data_new, year, T_upper, T_lower, dynamic_features):
    # 选择列
    Tmin_columns = [col for col in data_new.columns if '_Tmin' in col]
    Tmin = data_new[Tmin_columns].values
    Tmean_columns = [col for col in data_new.columns if '_Tmean' in col]
    Tmean = data_new[Tmean_columns].values
    Tmax_columns = [col for col in data_new.columns if '_Tmax' in col]
    Tmax = data_new[Tmax_columns].values
    Pre_columns = [col for col in data_new.columns if '_Pre' in col]
    Pre = data_new[Pre_columns].values
    
    # 计算日期范围
    days = Pre.shape[1]
    dates = pd.date_range(start=str(year) + '-01-01', periods=days, freq='D')
    
    # 添加年份信息
    data_new['year'] = year
    
    # 计算极端气象指标
    spei_df = spei(dates, Pre, Tmean)
    CDD_df, HDD_df, GDD_df = extreme_temperature(dates, Tmax, Tmin, T_upper, T_lower)
    
    # 聚合8天的数据
    data_new1 = aggre_8days(dynamic_features, dates, data_new)
    
    # 合并所有数据
    data_new1 = pd.concat([CDD_df, HDD_df, GDD_df, spei_df, data_new1], axis=1)
    
    return data_new1
    
def find_weeks(forecastDataList, week_dates):
    result = []
    # 遍历 forecastDataList 中的每个日期
    for date in forecastDataList:
        # 遍历 week_dates，以便找到日期所在的 week
        for i in range(len(week_dates) - 1):
            # 检查日期是否在当前日期范围内（包括下边界但不包括上边界）
            if week_dates[i] <= date < week_dates[i + 1]:
                result.append((date, i + 1))  # week 1 对应的 index 是 0，所以 week 是 i + 1
                break
        # 如果日期是最后一个日期范围之外的情况（即 week46 的范围）
        else:
            if date >= week_dates[-1]:
                result.append((date, len(week_dates)))  # 最后一周 week46
    result = {date: week for date, week in result}
    return result

def update_S2Sandhist_VI(data_S2S_new_all_new, VI_select2, result, years, start_point, harvest_point, outpath_S2S,ii,type):
    # 设置索引
  #  data_S2S_new_all_new.set_index(['year', 'idJoin'], inplace=True)

    # 筛选包含 VI_select2 的列
    filtered_columns = [col for col in data_S2S_new_all_new.columns if VI_select2 in col]
    data_S2S_VI = data_S2S_new_all_new[filtered_columns].reset_index()

    # 初始化更新的 DataFrame
    update_VI = pd.DataFrame()

    # 逐年更新数据
    for year in years:
        week_forecast = result[ii]
        forecast_weeklist = range(week_forecast, harvest_point + 1)
        actual_weeklist = range(start_point, week_forecast)
        
        forecast_weeklist = [f'Week{week}{VI_select2}' for week in forecast_weeklist]
        before_weeklist = [f'Week{week}{VI_select2}' for week in actual_weeklist]

        # 计算当前年的历史均值和预测均值
        data_S2S_VI_before = data_S2S_VI[before_weeklist + ['year']].groupby('year').mean()
        data_S2S_VI_forecast = data_S2S_VI[forecast_weeklist + ['year']].groupby('year').mean()
        
        # 提取当前年的数据
        current_S2S_VI = data_S2S_VI[data_S2S_VI['year'] == year]

        # 计算 DTW 距离
        dtw_distances = {}
        for year1 in years:
            current_S2S_VI_before = data_S2S_VI_before.loc[year]
            if year1 < year: # 只从前面的年份进行预报
                other_S2S_VI_before = data_S2S_VI_before.loc[year1]
                distance, path = fastdtw(current_S2S_VI_before, other_S2S_VI_before)
                dtw_distances[year1] = distance

        # 找到最相似的年份
        most_similar_by_dtw = min(dtw_distances, key=dtw_distances.get)
        dataVI_similaryear = data_S2S_VI[data_S2S_VI['year'] == most_similar_by_dtw]

        # 更新当前年的预测周数据
        current_S2S_VI[forecast_weeklist] = dataVI_similaryear[forecast_weeklist].values
        current_S2S_VI['year'] = year
        current_S2S_VI['idJoin'] = dataVI_similaryear['idJoin']

        # 合并更新的数据
        update_VI = pd.concat([update_VI, current_S2S_VI], axis=0)

    # 将更新后的数据重新设置索引
    update_VI.set_index(['year', 'idJoin'], inplace=True)
    data_S2S_new_all_new[forecast_weeklist] = update_VI[forecast_weeklist].values

    # 保存结果
    output_path = os.path.join(outpath_S2S, 'data_'+type+'.csv')
    data_S2S_new_all_new.to_csv(output_path)
    print(f"Updated data saved to {output_path}")

    return data_S2S_new_all_new

In [6]:
import os
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import sys
import os
root_directory = os.getcwd()[0:3]
sys.path.append(root_directory+'\\SCI\\SCI9_1\\01_code')
sys.path.append(r'C:\ProgramData\anaconda3\Lib\site-packages') 
sys.path.append(r'C:\Users\DELL\.conda\envs\myenv\Lib\site-packages') 
sys.path.append(r'C:\Users\DELL\.conda\envs\rasterio_env\Lib\site-packages') 
from functions import spei,extreme_temperature,aggre_8days,extract_dates
from sklearn.metrics import mean_absolute_percentage_error, accuracy_score, roc_auc_score, roc_curve,r2_score,mean_squared_error
from functions import calculate_rrmse1,calculate_rrmse2,calculate_acc,calculate_nrmse,calculate_mare,extract_selected_variables
from fastdtw import fastdtw


VIs =  ['_KNDVI' ,'_EVI','_NDVI']
Cilmate = ['_Pre' ,'_Tmin' ,'_Solar','_Tmean','_Tmax']
Climate_Exogenous  = ['_CDD' ,'_HDD' ,'_GDD','_VPD','_wind_speed','_SPEI'] #'_VPD','_wind_speed',
soil_feature = [ 'SAND','AWC', 'SILT','ORG_CARBON',  'TOTAL_N', 'PH_WATER',  'CEC_SOIL', 'CLAY']
loc_feature = ['elevation', 'lat', 'lon']
Year_feature = ['year'];union_feature = ['idJoin'];
dynamic_features = [ '_KNDVI' ,'_EVI','_NDVI','_Pre' ,'_Tmin' ,'_Solar','_Tmean','_VPD', '_wind_speed' ,'_Tmax']

import warnings
warnings.filterwarnings("ignore")
import ast
from fastdtw import fastdtw
 
# 获取当前工作目录
current_directory = os.getcwd()
print("当前工作目录:", current_directory)
 
# 获取当前文件夹的名字
current_folder_name = os.path.basename(current_directory)
print("当前文件夹名字:", current_folder_name)
 
# 获取上一级文件夹的名字
parent_directory = os.path.dirname(current_directory)
parent_folder_name = os.path.basename(parent_directory)
print("上一级文件夹名字:", parent_folder_name)

crop = parent_folder_name;countryID =current_folder_name
# 需要改变的变量
country = countryID.split('_')[1]
##############地区区域#############################################
inpath_dates_other = root_directory + '\\SCI\\SCI9_1\\02_data\\'+crop+'\\'+countryID+'\\'+'01_data'+'\\'+'07_Information'
other_infornamtion = pd.read_csv(os.path.join(inpath_dates_other,'information.txt'), sep=' ', header=None)
startyear,endyear,shp_name = other_infornamtion.iloc[0,0],other_infornamtion.iloc[0,1],other_infornamtion.iloc[0,2]

inputpath_base = root_directory + '\\SCI\\SCI9_1\\02_data\\'+crop+'\\'+countryID


Forecastyear = endyear


years = range(startyear,endyear+1)
regions = ['I']#
Forecastyears = {
    'I': endyear, 
}
# 按照作物定义温度阈值
if crop == '02_Wheat':
    T_upper = 34
    T_lower = 0
elif crop == '01_Maize':  # 修正了拼写错误
    T_upper = 30
    T_lower = 8
elif crop == '03_Rice':
    T_upper = 35
    T_lower = 8    
else:
    T_upper = 30
    T_lower = 10



inputpath_base = root_directory + '\\SCI\\SCI9_1\\02_data\\'+crop+'\\'+countryID+'\\'
institution = 'ECMWF';ECMWF_path = os.path.join(inputpath_base,'02_S2S')

file_path = os.path.join(inputpath_base, '02_S2S', '01_dataori', 'ECMWF','CommonYear_Week.txt')
with open(file_path, 'r') as file:
    lines = [line.strip() for line in file.readlines()]
    

当前工作目录: F:\SCI\SCI9_1\01_code\02_Wheat\06_India
当前文件夹名字: 06_India
上一级文件夹名字: 02_Wheat


In [7]:
startyear

2001

In [8]:
'''
【20250101】
# 1、# 历史数据;将其表头修改掉
【20250106】
# 1、重新修改了start_point<harvest_point的存在一小部分问题，主要是before数据替换的不太正确
# 2、添加了start_point>harvest_point，即跨年的情况
'''


for region in regions:
    Forecastyear = Forecastyears[region]
    hist_outputpath = os.path.join(inputpath_base,'02_S2S','03_outputData','02_histdata',region)
    os.makedirs(hist_outputpath,exist_ok=True)
    pre_name = 'Wheat_'+region+'_';
    hist_inputpath = os.path.join(inputpath_base,'01_data','04_GEEdownloadData','02_histdata')
    data = pd.read_csv(os.path.join(hist_inputpath,pre_name+str(1990)+'.csv'));
    data.columns = data.columns.str.replace(rf'^{1990}', '', regex=True)
    columns_sta = data.columns
    hist_start_year = Forecastyear-31;hist_end_year = Forecastyear-1;
    allhist = pd.DataFrame()
    
    for year_hist in range(hist_start_year,hist_end_year+1):
        data = pd.read_csv( os.path.join(hist_inputpath,pre_name+str(year_hist)+'.csv'));
        data.columns = data.columns.str.replace(rf'^{year_hist}', '', regex=True)
        data = data[columns_sta]
        data['idGroup'] = data['idJoin']
        data.drop(['idGroup', 'iso3', '.geo','system:index'], axis=1, inplace=True)#'idGroup', 
        # data.drop(['idGroup', 'iso3', 'lat', 'lon', '.geo','system:index'], axis=1, inplace=True)#'idGroup', 
        data.columns = str(Forecastyear) + data.columns
        data.rename(columns={f"{Forecastyear}idJoin": "idJoin"}, inplace=True)
        data.to_csv(os.path.join(hist_outputpath,'hist_'+str(year_hist)+'.csv'),index=False)

In [5]:
# 输出一下


In [9]:
'''
【20250106】
# 1、重新修改了start_point<harvest_point的存在一小部分问题，主要是before数据替换的不太正确
# 2、添加了start_point>harvest_point，即跨年的情况

【20250107】
# 1、重新修正了跨年的问题，跨年，前面的产量（开始到16周）应该是去年的，而非当年的
'''

'''
【20250106】
# 1、重新修改了start_point<harvest_point的存在一小部分问题，主要是before数据替换的不太正确
# 2、添加了start_point>harvest_point，即跨年的情况

【20250107】
# 1、重新修正了跨年的问题，跨年，前面的产量（开始到16周）应该是去年的，而非当年的


【20250316】

修正历史数据的错误，按照巴基斯坦修正后的代码 目前 美国 巴基斯坦 欧洲 阿根廷 澳大利亚 加拿大 印度已经修正 双区域俄罗斯和美国

'''


for region in regions:
        # 读取筛选的变量，用于后续变量筛选
        Forecastyear = Forecastyears[region]
        SelFeature_infornamtion = extract_selected_variables(inputpath_base)
        TimeFeatures_sel, Static_sel, regionID = SelFeature_infornamtion[SelFeature_infornamtion['regionID'] == region].iloc[0]
        # 实际建模的周数
        inpath_dates = os.path.join(inputpath_base, '01_data','05_buildmodel', '02_extractdates','gs_three_periods.txt')
        gs_infornamtion = pd.read_csv(inpath_dates, delim_whitespace=True, header=None)
        gs_infornamtion.columns = ['start_point', 'peak', 'harvest_point', 'VI_select2','regionID']
        start_point, peak, harvest_point, VI_select2, region = gs_infornamtion[gs_infornamtion['regionID'] == region].iloc[0]
        print(harvest_point)
        # 数据读取和指数的筛选
        data_ori_all = pd.read_csv(os.path.join(inputpath_base, '01_data','05_buildmodel','01_weekdata',region+'_allweekYielddata_VIs.csv'))
        data_ori_all = data_ori_all.drop_duplicates(subset=['year', 'idJoin'],keep='last')
        Static_sel= [col for col in Static_sel if 'year.1' not in col] 
        TimeFeatures_sel_all= [col for col in data_ori_all.columns if any(feature in col for feature in TimeFeatures_sel)]
        TimeFeatures_sel_all= [col for col in TimeFeatures_sel_all if 'Previous_Yield' not in col] # 注意前一年的产量会因为pre降雨而被筛选到，仔细确认
        filtered_columns_all = TimeFeatures_sel_all+Static_sel
        data_ori_all = data_ori_all[filtered_columns_all+['idJoin','Yield']] # 筛选选择的变量进入后续分析
    
        
        # 筛选VI进行后续的识别
        filtered_columns_VI = [col for col in data_ori_all.columns if VI_select2 in col]
        data_S2S_VI = data_ori_all[filtered_columns_VI + ['year','idJoin']]
        data_S2S_VI_mean = data_S2S_VI[filtered_columns_VI + ['year']].groupby('year').mean()
        if start_point < harvest_point: # 同年生长
            hisWeekList = ['leadweek_'+str(week) for week in range(1,harvest_point-start_point+1)] # 设定的hisWeekList好像不包括了start_point周
        else:
            hisWeekList = ['leadweek_'+str(week) for week in range(1,harvest_point-start_point+1+46)]+['leadweek_'+str(week) for week in range(1,harvest_point-start_point+1)]
        hist_inputpath = os.path.join(inputpath_base,'02_S2S','03_outputData','02_histdata',region)
        data_ori_current = data_ori_all[data_ori_all['year']==Forecastyear]
        hist_start_year = Forecastyear-30;hist_end_year = Forecastyear-1;
    
        for year_hist in range(hist_start_year,hist_end_year+1):
            # 处理当前历史年的数据
            data_his_new_ori = pd.read_csv(os.path.join(hist_inputpath,'hist_'+str(year_hist)+'.csv'))
            data_his_new_ori = data_his_new_ori.drop_duplicates(subset=['idJoin'],keep='last')
            data_his_new_ori.set_index('idJoin', inplace=True)
            data_his_new_ori['year'] = Forecastyear
            data_ori_all = data_ori_all.drop_duplicates(subset=['year', 'idJoin'],keep='last')
            data_his_new = data_his_new_ori.copy()
            data_his_new = process_climate_data(data_his_new.reset_index(), Forecastyear, T_upper, T_lower, dynamic_features)
            data_his_new = data_his_new.dropna(how='all',axis=1) # process_climate_data会引入全部的植被指数
            # 将data_his_new计算的有误差（缺失值并未处理），更换成之前建模插补过得
            hist_outputpath1 = os.path.join(inputpath_base,'02_S2S','05_WeekData','02_hist',region)
            os.makedirs(hist_outputpath1,exist_ok=True)
            data_his_new.index=data_his_new_ori.index
            data_his_new.to_csv(os.path.join(hist_outputpath1,'hist_'+str(year_hist)+'.csv'))
            
            data_his_new_update = data_ori_current.copy()
            #data_his_new = data_his_new.merge(data_ori_current[filtered_columns_VI+Static_sel+['idJoin','Yield']],on='idJoin',how='inner')# 将VI，静态变量和Y update上面去，数据种类预期保持一致
            data_his_new['year'] = Forecastyear 


            for ii in hisWeekList:
                # data_his_new = data_his_new[filtered_columns_all+['idJoin']]
                ############################################## 找最相似的植被指数填充，不跨年， start_point<harvest_point####
                if int(ii[9:])>harvest_point: # 这里只需要区分week_forecast预报周是否跨年
                    week_forecast = harvest_point+1-int(ii[9:])+46
                else:
                    week_forecast = harvest_point+1-int(ii[9:])

                if start_point < harvest_point:  
                    # 同一年种植和收获
                    forecast_weeklist1 = range(week_forecast, harvest_point + 1)
                    V1= [f'Week{week}{VI_select2}' for week in range(1, week_forecast)]; # before的植被指数
                    V2= [f'Week{week}{VI_select2}' for week in forecast_weeklist1];# 寻找要被要被预报的# 预报当前周到收获的日子week_forecast是没有的
            
                    current_S2S_VI_before =data_S2S_VI_mean.loc[Forecastyear][V1]
                    dtw_distances = {}
                    for year1 in range(startyear,Forecastyear):# 不会取到开始年到Forecastyear前一年
                        other_S2S_VI_before = data_S2S_VI_mean.loc[year1][V1]
                        distance, path = fastdtw(current_S2S_VI_before, other_S2S_VI_before)# 预报当前周到收获的日子
                        dtw_distances[year1] = distance
                    most_similar_by_dtw = min(dtw_distances, key=dtw_distances.get) # 
                    data_S2S_VI_forecast2 = data_S2S_VI[data_S2S_VI['year'] == most_similar_by_dtw][V2+['idJoin']]# 只需要建模的数据
                    data_his_new_update = data_his_new_update.drop(V2,axis=1) # 删除原有的预报日期对应的，不预报还是保留
                    data_his_new_update = data_his_new_update.merge(data_S2S_VI_forecast2,on='idJoin',how='inner')
                else:
                    ############################################## 找最相似的植被指数填充，跨年， start_point>harvest_point####
                    week_forecast = harvest_point+1-int(ii[9:]) # 
                    if week_forecast<=0: # 预报期市前一年，计算的为负数；就要加上46
                        week_forecast = harvest_point+1-int(ii[9:])+46 # 
                    else:
                        week_forecast = week_forecast

                    if week_forecast<=harvest_point: # = 是当前的第一周
                        # 上全年，因为跨年很有可能在第一周，这样必须要前一年去找相似年；与同一年生殖期不一样的是，一般同一年生殖期前面会有一个序列
                        forecast_weeklist1 = range(week_forecast, harvest_point+1)
                        V1_1 = [f'Week{week}{VI_select2}' for week in range(1, 46+1)];
                        V1_2 = [f'Week{week}{VI_select2}' for week in range(1, week_forecast)];
                        V2 = [f'Week{week}{VI_select2}' for week in forecast_weeklist1];
                        current_S2S_VI_before =pd.concat([data_S2S_VI_mean.loc[Forecastyear][V1_1], data_S2S_VI_mean.loc[Forecastyear-1][V1_2]])
                        dtw_distances = {}

                        for year1 in range(startyear+1,Forecastyear):# 需要两年，不会取到Forecastyear年
                            other_S2S_VI_before = pd.concat([data_S2S_VI_mean.loc[year1][V1_1], data_S2S_VI_mean.loc[year1-1][V1_2]])
                            distance, path = fastdtw(current_S2S_VI_before, other_S2S_VI_before)# 预报当前周到收获的日子
                            dtw_distances[year1] = distance

                        most_similar_by_dtw = min(dtw_distances, key=dtw_distances.get) 

                        # 只需要当前年替换建模的数据

                        data_S2S_VI_forecast2 = data_S2S_VI[data_S2S_VI['year'] == most_similar_by_dtw][V2+['idJoin']]
                        data_his_new_update = data_his_new_update.drop(V2,axis=1) # 删除原有的预报日期对应的，不预报还是保留
                        data_his_new_update = data_his_new_update.merge(data_S2S_VI_forecast2,on='idJoin',how='inner')

                    else:  
                        # 跨年，替换的就是 list(range(week_forecast, 46))+list(range(1,harvest_point + 1))

                        forecast_weeklist1 = list(range(week_forecast, 46+1))+list(range(1,harvest_point + 1))
                        
                        V1_1= [f'Week{week}{VI_select2}' for week in range(1, week_forecast)]; # 前一年
                        V2_1 =  [f'Week{week}{VI_select2}' for week in range(week_forecast, 46+1)]; # 前一年
                        V2_2 =  [f'Week{week}{VI_select2}' for week in range(1,harvest_point + 1)]; # 当年

                        current_S2S_VI_before =data_S2S_VI_mean.loc[Forecastyear-1][V1_1]
                        dtw_distances = {}
                        for year1 in range(startyear+1,Forecastyear-1):# 不会取到Forecastyear年
                            other_S2S_VI_before = data_S2S_VI_mean.loc[year1-1][V1_1]
                            distance, path = fastdtw(current_S2S_VI_before, other_S2S_VI_before)# 预报当前周到收获的日子
                            dtw_distances[year1] = distance
                        most_similar_by_dtw = min(dtw_distances, key=dtw_distances.get) # 找到2016年
                            
                        data_S2S_VI_forecast1 = data_S2S_VI[data_S2S_VI['year'] == most_similar_by_dtw][V2_1+['idJoin']]# 只需要建模的数据
                        data_S2S_VI_forecast2 = data_S2S_VI[data_S2S_VI['year'] == most_similar_by_dtw+1][V2_2]# 只需要建模的数据
                        data_S2S_VI_forecast2 = pd.concat([data_S2S_VI_forecast1.reset_index(drop=True), data_S2S_VI_forecast2.reset_index(drop=True)], axis=1)# 横向拼接
                        data_his_new_update = data_his_new_update.drop(V2_1+V2_2,axis=1) # 删除原有的预报日期对应的，不预报还是保留
                        data_his_new_update = data_his_new_update.merge(data_S2S_VI_forecast2,on='idJoin',how='inner')               
                data_his_new_update.set_index('idJoin', inplace=True)
                ############################################## hist替换原始数据的需要预报的周数 ###################################################################
                if week_forecast<=harvest_point: # 说明不跨年;只需要替换用到当年的数据
                    update_climate = []
                    for feature in [feature for feature in TimeFeatures_sel if feature != VI_select2[1:]]: # 除了植被指数的所选气象数据
                        update_climate += [f'Week{week}_{feature}' for week in forecast_weeklist1] # 虽然有，但是你要记住啊，是存在跨年的，意味着要 
                        
                    data_his_new_update[update_climate] = data_his_new[update_climate] # 替换是his来替换原始，
                else: # 跨年需要用到前一年的历史数据，
                    # 读取处理前一年的数据，week_forecast到46的替换数据
                    data_his_new_ori_lastyear = pd.read_csv(os.path.join(hist_inputpath,'hist_'+str(year_hist-1)+'.csv'))
                    data_his_new_ori_lastyear.set_index('idJoin', inplace=True)
                    data_his_new_ori_lastyear['year'] = Forecastyear
                    data_his_new_lastyear = data_his_new_ori_lastyear.copy()
                    data_his_new_lastyear = process_calimate_data(data_his_new_lastyear.reset_index(), Forecastyear, T_upper, T_lower, dynamic_features)
                    data_his_new_lastyear = data_his_new_lastyear.dropna(how='all',axis=1) # process_climate_data会引入全部的植被指数

                    # 前一年range(week_forecast, 46)
                    update_climate1 = []
                    for feature in [feature for feature in TimeFeatures_sel if feature != VI_select2[1:]]: # 除了植被指数的所选气象数据
                        update_climate1 += [f'Week{week}_{feature}' for week in list(range(week_forecast, 46))] # 
                    data_his_new_update[update_climate1] = data_his_new_lastyear[update_climate1] # 替换是his来替换原始，
                    
                    # 当年的1到harvest_point
                    update_climate2 = []
                    for feature in [feature for feature in TimeFeatures_sel if feature != VI_select2[1:]]: # 除了植被指数的所选气象数据
                        update_climate2 += [f'Week{week}_{feature}' for week in list(range(1, harvest_point + 1))] # 
                    data_his_new_update[update_climate2] = data_his_new[update_climate2] # 替换是his来替换原始，
                ############################################## 筛选生育期的变量 ############################################################################
                data_his_new_update = data_his_new_update.reset_index()
                weeks = []
                # 判断是否跨年
                if start_point < harvest_point:  # 不跨年
                    for feature in TimeFeatures_sel:
                        # 使用列表生成器生成周和特征的组合
                        weeks += [f'Week{week}_{feature}' for week in range(start_point, harvest_point + 1)]
                    gs_features = weeks + Static_sel+['Yield']+['idJoin']
                    data_his_new_update = data_his_new_update[gs_features]
                else:  # 跨年
                    for feature in TimeFeatures_sel:
                        # 合并两段范围并生成周和特征的组合
                        weeks += [f'Week{week}_{feature}' for week in list(range(start_point, 47)) + list(range(1, harvest_point + 1))]
                     # 跨年前面的产量应该是替换成前一年的，_data_ori是对的；但是新生成的还是当年的
                    data= pd.read_csv(os.path.join(inputpath_base, '01_data','05_buildmodel','03_modeldata',region+'_data_ori.csv'))
                    data = data.drop_duplicates(subset=['year', 'idJoin'],keep='last')
                    weeks = []
                    for feature in TimeFeatures_sel:
                        weeks += [f'Week{week}_{feature}' for week in list(range(start_point, 47))]     
                    data = data[weeks+['idJoin','year']];
                    data_his_new_update = data_his_new_update.drop(weeks,axis=1);
                    data = data[data['year']==Forecastyear]
                    data_his_new_update = data_his_new_update.merge(data,on=['idJoin','year'],how='inner')  
                # 替换前一年的
                ##############################################输出 ############################################################################
                hist_outputpath = os.path.join(inputpath_base,'02_S2S','06_buildmodel','02_hist','VI_Like',region,ii)
                os.makedirs(hist_outputpath,exist_ok=True)
                data_his_new_update.to_csv(os.path.join(hist_outputpath,'hist_'+str(year_hist)+'.csv'),index=False)

19


In [None]:
      

# 找到相似年份

In [7]:


# data_S2S_new_all_new['year'] == current_S2S_VI['year']