In [1]:
import pandas as pd
import joblib

In [3]:
df = pd.read_csv(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/electric_test.csv')

In [5]:
df.reset_index(inplace=True)

In [6]:
df.rename(columns={'NUM': '격자넘버'}, inplace=True)
df.rename(columns={'TM': '날짜'}, inplace=True)
df.rename(columns={'HH24': '시간'}, inplace=True)
df.rename(columns={'STN': '지점번호'}, inplace=True)
df.rename(columns={'nph_ta': '기온'}, inplace=True)
df.rename(columns={'nph_hm': '상대습도'}, inplace=True)
df.rename(columns={'nph_ws_10m': '풍속'}, inplace=True)
df.rename(columns={'nph_rn_60m': '강수량'}, inplace=True)
df.rename(columns={'nph_ta_chi': '체감온도'}, inplace=True)
df.rename(columns={'weekday': '요일'}, inplace=True)
df.rename(columns={'week_name': '주중주말'}, inplace=True)

In [7]:
base_temp = 18.0

# CDH와 HDH 계산 함수
def calculate_cdh(temp, base_temp):
    return max(0, temp - base_temp)

def calculate_hdh(temp, base_temp):
    return max(0, base_temp - temp)

# df 셋
df['CDH'] = df.apply(lambda x: calculate_cdh(x['기온'], base_temp), axis=1)
df['HDH'] = df.apply(lambda x: calculate_hdh(x['기온'], base_temp), axis=1)

# 불쾌지수 컬럼 추가
def calculate_discomfort_index(temperature, humidity):
    return 1.8 * temperature - 0.55 * (1 - humidity / 100) * (1.8 * temperature - 26) + 32

# df 셋
df['불쾌지수'] = df.apply(lambda x: calculate_discomfort_index(x['기온'], x['상대습도']), axis=1)


In [8]:
df = df.sort_values(by='날짜')
df['날짜'] = pd.to_datetime(df['날짜'])

In [9]:
def assign_season(date):
    if (date.month == 2) or (date.month == 3) or (date.month == 4):
        return '봄'
    elif (date.month == 5) or (date.month == 6) or (date.month == 7):
        return '여름'
    elif (date.month == 8) or (date.month == 9) or (date.month == 10):
        return '가을'
    else:
        return '겨울'

# '계절' 컬럼 생성
df['계절'] = df['날짜'].apply(assign_season)

In [10]:
df

Unnamed: 0,index,격자넘버,날짜,시간,지점번호,기온,상대습도,풍속,강수량,체감온도,요일,주중주말,elect,CDH,HDH,불쾌지수,계절
0,0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,6,1.0,,0.0,15.0,40.957620,겨울
2627998,2627998,18574,2023-01-01 01:00:00,1,876,2.1,48.3,0.5,0.0,2.1,6,1.0,,0.0,15.9,42.098257,겨울
201480,201480,18828,2023-01-01 01:00:00,1,511,0.2,54.3,1.9,0.0,-0.3,6,1.0,,0.0,17.8,38.804614,겨울
1340280,1340280,17794,2023-01-01 01:00:00,1,436,-2.5,69.6,0.5,0.0,-4.1,6,1.0,,0.0,20.5,32.599600,겨울
1489200,1489200,19136,2023-01-01 01:00:00,1,451,1.1,47.3,1.6,0.0,-2.5,6,1.0,,0.0,16.9,40.942197,겨울
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2522877,2522877,11271,2024-01-01 00:00:00,24,296,1.3,64.3,0.5,0.0,1.3,0,0.0,,0.0,16.7,38.985641,겨울
2006039,2006039,14239,2024-01-01 00:00:00,24,279,1.2,88.5,0.1,0.0,1.2,0,0.0,,0.0,16.8,35.667880,겨울
814679,814679,13619,2024-01-01 00:00:00,24,702,4.0,77.9,1.6,0.0,2.7,0,0.0,,0.0,14.0,41.485140,겨울
840959,840959,17791,2024-01-01 00:00:00,24,119,-1.2,96.0,0.0,0.0,-1.2,0,0.0,,0.0,19.2,30.459520,겨울


In [11]:
seasonal_averages = df.groupby('계절').agg({
    '기온':'mean',
    '상대습도':'mean',
    '체감온도':'mean',
    '풍속':'mean',
    '강수량':'mean'
}).reset_index()

seasonal_averages

Unnamed: 0,계절,기온,상대습도,체감온도,풍속,강수량
0,가을,22.021828,78.373901,23.030252,1.543439,0.169337
1,겨울,5.729009,63.054185,2.239713,2.061995,0.0742
2,봄,9.46631,63.172411,8.188722,1.979861,0.057899
3,여름,21.210946,81.588478,24.043474,1.71853,0.303438


In [12]:
df = df.merge(seasonal_averages, on='계절', suffixes=('', '_계절별평균'))

In [13]:
def assign_date(df, date_column):
    df['년'] = df[date_column].dt.year
    df['월'] = df[date_column].dt.month
    df['일'] = df[date_column].dt.day
    df['시간'] = df[date_column].dt.hour
    return df

test = assign_date(df, '날짜')

In [14]:
df = pd.get_dummies(test, columns=['계절'])

In [15]:
df

Unnamed: 0,index,격자넘버,날짜,시간,지점번호,기온,상대습도,풍속,강수량,체감온도,...,체감온도_계절별평균,풍속_계절별평균,강수량_계절별평균,년,월,일,계절_가을,계절_겨울,계절_봄,계절_여름
0,0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,...,2.239713,2.061995,0.0742,2023,1,1,False,True,False,False
1,2627998,18574,2023-01-01 01:00:00,1,876,2.1,48.3,0.5,0.0,2.1,...,2.239713,2.061995,0.0742,2023,1,1,False,True,False,False
2,201480,18828,2023-01-01 01:00:00,1,511,0.2,54.3,1.9,0.0,-0.3,...,2.239713,2.061995,0.0742,2023,1,1,False,True,False,False
3,1340280,17794,2023-01-01 01:00:00,1,436,-2.5,69.6,0.5,0.0,-4.1,...,2.239713,2.061995,0.0742,2023,1,1,False,True,False,False
4,1489200,19136,2023-01-01 01:00:00,1,451,1.1,47.3,1.6,0.0,-2.5,...,2.239713,2.061995,0.0742,2023,1,1,False,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2829473,2522877,11271,2024-01-01 00:00:00,0,296,1.3,64.3,0.5,0.0,1.3,...,2.239713,2.061995,0.0742,2024,1,1,False,True,False,False
2829474,2006039,14239,2024-01-01 00:00:00,0,279,1.2,88.5,0.1,0.0,1.2,...,2.239713,2.061995,0.0742,2024,1,1,False,True,False,False
2829475,814679,13619,2024-01-01 00:00:00,0,702,4.0,77.9,1.6,0.0,2.7,...,2.239713,2.061995,0.0742,2024,1,1,False,True,False,False
2829476,840959,17791,2024-01-01 00:00:00,0,119,-1.2,96.0,0.0,0.0,-1.2,...,2.239713,2.061995,0.0742,2024,1,1,False,True,False,False


In [16]:
spring_va = df[df['계절_봄']==True]
summer_va = df[df['계절_여름']==True]
autum_va = df[df['계절_가을']==True]
winter_va = df[df['계절_겨울']==True]

In [18]:
loaded_model = joblib.load(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/model/spring_best_model.pkl')

In [19]:
spring_va

Unnamed: 0,index,격자넘버,날짜,시간,지점번호,기온,상대습도,풍속,강수량,체감온도,...,체감온도_계절별평균,풍속_계절별평균,강수량_계절별평균,년,월,일,계절_가을,계절_겨울,계절_봄,계절_여름
239989,2663781,11870,2023-02-01 00:00:00,0,939,1.0,76.9,0.0,0.0,1.0,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
239990,570143,17938,2023-02-01 00:00:00,0,545,2.3,70.2,0.3,0.0,4.3,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
239991,2024303,14091,2023-02-01 00:00:00,0,825,4.5,67.4,0.5,0.0,-2.1,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
239992,1384823,18539,2023-02-01 00:00:00,0,457,2.9,73.4,0.3,0.0,5.1,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
239993,2252061,10222,2023-02-01 00:00:00,0,294,9.4,79.3,1.4,0.0,3.0,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
929912,1343158,17794,2023-04-30 23:00:00,23,436,12.2,82.7,0.4,0.0,11.8,...,8.188722,1.979861,0.057899,2023,4,30,False,False,True,False
929913,519718,10935,2023-04-30 23:00:00,23,158,13.1,86.0,4.5,0.0,11.5,...,8.188722,1.979861,0.057899,2023,4,30,False,False,True,False
929914,29158,9884,2023-04-30 23:00:00,23,165,10.7,80.3,5.8,0.0,11.9,...,8.188722,1.979861,0.057899,2023,4,30,False,False,True,False
929915,1027798,18984,2023-04-30 23:00:00,23,424,12.9,64.9,2.9,0.0,11.8,...,8.188722,1.979861,0.057899,2023,4,30,False,False,True,False


In [20]:
predictions_new_data = loaded_model.predict(spring_va.drop(['년','날짜','index'],axis=1))

In [21]:
spring_va['elect'] = predictions_new_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [22]:
loaded_model = joblib.load(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/model/summer_best_model.pkl')

In [23]:
predictions_new_data = loaded_model.predict(summer_va.drop(['년','날짜','index'],axis=1))

In [24]:
summer_va['elect'] = predictions_new_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [25]:
loaded_model = joblib.load(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/model/autum_best_model.pkl')

In [26]:
predictions_new_data = loaded_model.predict(autum_va.drop(['년','날짜','index'],axis=1))

In [27]:
autum_va['elect'] = predictions_new_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [28]:
loaded_model = joblib.load(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/model/winter_best_model.pkl')

In [29]:
predictions_new_data = loaded_model.predict(winter_va.drop(['년','날짜','index'],axis=1))

In [30]:
winter_va['elect'] = predictions_new_data

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [31]:
concat2 = pd.concat([spring_va, summer_va, autum_va, winter_va], ignore_index=True)

In [32]:
concat2

Unnamed: 0,index,격자넘버,날짜,시간,지점번호,기온,상대습도,풍속,강수량,체감온도,...,체감온도_계절별평균,풍속_계절별평균,강수량_계절별평균,년,월,일,계절_가을,계절_겨울,계절_봄,계절_여름
0,2663781,11870,2023-02-01,0,939,1.0,76.9,0.0,0.0,1.0,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
1,570143,17938,2023-02-01,0,545,2.3,70.2,0.3,0.0,4.3,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
2,2024303,14091,2023-02-01,0,825,4.5,67.4,0.5,0.0,-2.1,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
3,1384823,18539,2023-02-01,0,457,2.9,73.4,0.3,0.0,5.1,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
4,2252061,10222,2023-02-01,0,294,9.4,79.3,1.4,0.0,3.0,...,8.188722,1.979861,0.057899,2023,2,1,False,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2829473,2522877,11271,2024-01-01,0,296,1.3,64.3,0.5,0.0,1.3,...,2.239713,2.061995,0.074200,2024,1,1,False,True,False,False
2829474,2006039,14239,2024-01-01,0,279,1.2,88.5,0.1,0.0,1.2,...,2.239713,2.061995,0.074200,2024,1,1,False,True,False,False
2829475,814679,13619,2024-01-01,0,702,4.0,77.9,1.6,0.0,2.7,...,2.239713,2.061995,0.074200,2024,1,1,False,True,False,False
2829476,840959,17791,2024-01-01,0,119,-1.2,96.0,0.0,0.0,-1.2,...,2.239713,2.061995,0.074200,2024,1,1,False,True,False,False


In [33]:
data = pd.read_csv(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/electric_test.csv')

In [34]:
data.reset_index(inplace=True)

In [35]:
data

Unnamed: 0,index,NUM,TM,HH24,STN,nph_ta,nph_hm,nph_ws_10m,nph_rn_60m,nph_ta_chi,weekday,week_name,elect
0,0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,6,1.0,
1,1,4816,2023-01-01 02:00:00,2,752,3.1,69.4,2.7,0.0,0.3,6,1.0,
2,2,4816,2023-01-01 03:00:00,3,752,3.6,68.3,2.3,0.0,1.2,6,1.0,
3,3,4816,2023-01-01 04:00:00,4,752,4.0,69.2,3.1,0.0,1.1,6,1.0,
4,4,4816,2023-01-01 05:00:00,5,752,4.2,69.5,2.5,0.0,2.0,6,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2829473,2829473,12322,2023-12-31 20:00:00,20,901,5.6,70.8,3.2,0.0,5.8,6,1.0,
2829474,2829474,12322,2023-12-31 21:00:00,21,901,5.3,69.1,3.6,0.0,4.5,6,1.0,
2829475,2829475,12322,2023-12-31 22:00:00,22,901,5.1,70.6,3.0,0.0,5.1,6,1.0,
2829476,2829476,12322,2023-12-31 23:00:00,23,901,5.2,69.7,3.8,0.0,5.1,6,1.0,


In [36]:
data['TM'] = pd.to_datetime(data['TM'])

In [37]:
concat2.drop(['CDH','HDH','불쾌지수','기온_계절별평균','상대습도_계절별평균','체감온도_계절별평균','풍속_계절별평균','강수량_계절별평균','계절_가을','계절_겨울','계절_여름','계절_봄'],axis=1, inplace=True)

In [38]:
concat2.rename(columns={'격자넘버': 'NUM'}, inplace=True)
concat2.rename(columns={'날짜': 'TM'}, inplace=True)
concat2.rename(columns={'시간': 'HH24'}, inplace=True)
concat2.rename(columns={'지점번호': 'STN'}, inplace=True)
concat2.rename(columns={'기온': 'nph_ta'}, inplace=True)
concat2.rename(columns={'상대습도': 'nph_hm'}, inplace=True)
concat2.rename(columns={'풍속': 'nph_ws_10m'}, inplace=True)
concat2.rename(columns={'강수량': 'nph_rn_60m'}, inplace=True)
concat2.rename(columns={'체감온도': 'nph_ta_chi'}, inplace=True)
concat2.rename(columns={'요일': 'weekday'}, inplace=True)
concat2.rename(columns={'주중주말': 'week_name'}, inplace=True)


In [39]:
concat = concat2[['index','elect']]

In [40]:
a = ['index']
merge_outer = pd.merge(data,concat, how='inner',on=a)

In [41]:
merge_outer

Unnamed: 0,index,NUM,TM,HH24,STN,nph_ta,nph_hm,nph_ws_10m,nph_rn_60m,nph_ta_chi,weekday,week_name,elect_x,elect_y
0,0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,6,1.0,,94.689493
1,1,4816,2023-01-01 02:00:00,2,752,3.1,69.4,2.7,0.0,0.3,6,1.0,,84.025079
2,2,4816,2023-01-01 03:00:00,3,752,3.6,68.3,2.3,0.0,1.2,6,1.0,,75.544979
3,3,4816,2023-01-01 04:00:00,4,752,4.0,69.2,3.1,0.0,1.1,6,1.0,,70.451624
4,4,4816,2023-01-01 05:00:00,5,752,4.2,69.5,2.5,0.0,2.0,6,1.0,,67.880969
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2829473,2829473,12322,2023-12-31 20:00:00,20,901,5.6,70.8,3.2,0.0,5.8,6,1.0,,139.358039
2829474,2829474,12322,2023-12-31 21:00:00,21,901,5.3,69.1,3.6,0.0,4.5,6,1.0,,136.255563
2829475,2829475,12322,2023-12-31 22:00:00,22,901,5.1,70.6,3.0,0.0,5.1,6,1.0,,132.128986
2829476,2829476,12322,2023-12-31 23:00:00,23,901,5.2,69.7,3.8,0.0,5.1,6,1.0,,120.024742


In [42]:
merge_outer.rename(columns={'elect_y': 'elect'}, inplace=True)

In [43]:
merge_outer.drop('elect_x',axis=1,inplace=True)

In [44]:
merge_outer.drop('index',axis=1,inplace=True)

In [46]:
merge_outer.to_csv(r'C:/Users/DC/OneDrive - 계명대학교/DC/2024/2024_weatherbig/데셋/240223.csv')

In [47]:
merge_outer

Unnamed: 0,NUM,TM,HH24,STN,nph_ta,nph_hm,nph_ws_10m,nph_rn_60m,nph_ta_chi,weekday,week_name,elect
0,4816,2023-01-01 01:00:00,1,752,3.0,68.6,2.9,0.0,-0.1,6,1.0,94.689493
1,4816,2023-01-01 02:00:00,2,752,3.1,69.4,2.7,0.0,0.3,6,1.0,84.025079
2,4816,2023-01-01 03:00:00,3,752,3.6,68.3,2.3,0.0,1.2,6,1.0,75.544979
3,4816,2023-01-01 04:00:00,4,752,4.0,69.2,3.1,0.0,1.1,6,1.0,70.451624
4,4816,2023-01-01 05:00:00,5,752,4.2,69.5,2.5,0.0,2.0,6,1.0,67.880969
...,...,...,...,...,...,...,...,...,...,...,...,...
2829473,12322,2023-12-31 20:00:00,20,901,5.6,70.8,3.2,0.0,5.8,6,1.0,139.358039
2829474,12322,2023-12-31 21:00:00,21,901,5.3,69.1,3.6,0.0,4.5,6,1.0,136.255563
2829475,12322,2023-12-31 22:00:00,22,901,5.1,70.6,3.0,0.0,5.1,6,1.0,132.128986
2829476,12322,2023-12-31 23:00:00,23,901,5.2,69.7,3.8,0.0,5.1,6,1.0,120.024742
