### 강수량 예측 프로그램-확률과 통계 탐구

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn.model_selection
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

In [None]:
df = pd.read_csv('/content/data_jeju.csv')
print(df.head(5))

In [None]:
print(df.info())

In [None]:
# 'date'과 'city' 열 제거
df_cleaned= df.drop(columns=['기준 연월', '지역 구분','최심신적설(cm)','평균운량(1/10)' ])

# 결과 확인
df_cleaned.head()

df_cleaned.columns = ['temperature','rainfall','moisture','sealevelpressure','dewpoint','daylight(hr)','windspeed']
df_cleaned.head()

In [None]:
corrleation = df_cleaned.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corrleation, annot=True, cmap='coolwarm')
plt.show()

In [None]:
x=df_cleaned.drop(columns=['rainfall'])
y=df_cleaned['rainfall']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.preprocessing import StandardScaler

#정규화
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

y_scaler = StandardScaler()
y_train_scaled = y_scaler.fit_transform(y_train.values.reshape(-1,1))
y_test_scaled = y_scaler.transform(y_test.values.reshape(-1, 1))

In [None]:
 model = Sequential()
 model.add(Dense(200,input_dim=x.shape[1], activation='relu'))
 model.add(Dense(160, activation='linear'))
 model.add(Dense(100, activation='relu'))
 model.add(Dense(1, activation='linear'))
 model.compile(loss='mse',metrics=['mae'], optimizer='adam')
 model.fit(x_train_scaled, y_train_scaled, epochs=300, batch_size=30, validation_split=0.2)

In [None]:
y_pred_scaled = model.predict(x_test_scaled)
y_pred = y_scaler.inverse_transform(y_pred_scaled)
mse = mean_squared_error(y_test, y_pred.flatten())
mae = mean_absolute_error(y_test, y_pred.flatten())
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred.flatten())

print(f'MSE  : {mse:.2f}')
print(f'MAE(평균적으로 몇mm의 오차가 발생하는지)  : {mae:.2f}')
print(f'RMSE(평균적인 예측 오차 mm) : {rmse:.2f}')
print(f'R²   : {r2:.2f}')


In [None]:
import matplotlib.pyplot as plt
import pandas as pd

# Align indices (if needed)
y_test_sorted = y_test.reset_index(drop=True)
y_pred_sorted = pd.Series(y_pred.flatten()).reset_index(drop=True)
plt.figure(figsize=(12, 6))
plt.plot(y_test_sorted, label='Actual Rainfall', color='blue', marker='o')
plt.plot(y_pred_sorted, label='Predicted Rainfall', color='orange', marker='x')
plt.title(' Actual / Predicted Rainfall')
plt.xlabel('Test Sample Index')
plt.ylabel('Rainfall (mm)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

#조건부확률을 이용한 강수량 예측
비가 온다고 가정한 후 입력하는 조건에 따라 강수량을 예측함

In [None]:
import pandas as pd

# 데이터 불러오기
df = pd.read_csv('/content/data_jeju.csv')
df = df.drop(columns=['date', 'city'])

# 조건 1: 월 평균 기온이 근처인 데이터만 필터링 (±0.5도 허용)
target_temp = int(input("월 평균 기온(℃) : "))
delta = 0.5
filtered_df = df[(df['temperature'] >= target_temp - delta) & (df['temperature'] <= target_temp + delta)]

# 조건 2: 강수량이 특정 값 이상 (예: 10mm 이상)
rain_threshold = int(input("기준 강수량(mm) : "))
st_rain = filtered_df['rainfall'] >= rain_threshold
print(st_rain)

count_rain = st_rain.sum()  #기온이 21도 근처인 데이터 중 강수량이 특정값 이상인 데이터
tem_count = len(filtered_df)

print(count_rain)
print(tem_count)


# 조건부 확률 계산
if tem_count > 0:
    conditional = count_rain / tem_count
    print(f"월 평균 기온이 약 {target_temp}℃일 때, 강수량이 {rain_threshold}mm 이상일 확률: {conditional :.2%}")
else:
    print("해당 조건을 만족하는 데이터가 없습니다.")