In [2]:
import yfinance as yf
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import tweepy

# 1. X API 설정 (예시로 과거 데이터 가정)
# 실제로는 본인의 API 키 필요: https://developer.twitter.com/
consumer_key = "your_consumer_key"
consumer_secret = "your_consumer_secret"
access_token = "your_access_token"
access_token_secret = "your_access_token_secret"

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)

# 2. 과거 데이터 수집
sp500 = yf.download('SPY', start='2007-01-01', end='2009-12-31')
sp500['Daily_Return'] = sp500['Close'].pct_change()
sp500['Volatility'] = sp500['Close'].rolling(window=20).std()
sp500['Crisis_Signal'] = (sp500['Daily_Return'] < -0.05) & (sp500['Volatility'] > sp500['Volatility'].mean())

# 3. X 데이터 수집 (가정: "SPY crash" 키워드로 트윗 수 계산)
# 실제론 2007-2009년 트윗은 제한적이라 예시로 최근 데이터 사용 가능
def get_tweet_sentiment(date, keyword="SPY crash"):
    try:
        tweets = tweepy.Cursor(api.search_tweets, q=keyword, lang="en", 
                               since=date.strftime('%Y-%m-%d'), 
                               until=(date + pd.Timedelta(days=1)).strftime('%Y-%m-%d')).items(100)
        tweet_count = len([tweet.text for tweet in tweets])
        return tweet_count / 100  # 간단히 정규화 (0~1 사이)
    except:
        return 0  # 트윗 없으면 0

# 데이터에 트윗 감성 추가 (샘플로 일부 날짜만)
sp500['Tweet_Sentiment'] = [get_tweet_sentiment(date) for date in sp500.index[:10]] + [0] * (len(sp500) - 10)  # 예시로 10일만

# 4. 데이터 정제
data = sp500[['Daily_Return', 'Volatility', 'Tweet_Sentiment', 'Crisis_Signal']].dropna()
X = data[['Daily_Return', 'Volatility', 'Tweet_Sentiment']]
y = data['Crisis_Signal']

# 5. 모델 학습
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
print(f"모델 정확도: {model.score(X_test, y_test):.2f}")

# 6. 예측
predictions = model.predict(X)
data['Prediction'] = predictions
data['Trade_Signal'] = data['Prediction'].apply(lambda x: 'Sell' if x else 'Buy')

# 7. 시각화
plt.figure(figsize=(14, 7))
plt.plot(data.index, sp500.loc[data.index, 'Close'], label='S&P 500 Close', color='blue')

# 위기 신호 표시
crisis_points = data[data['Prediction'] == True]
plt.scatter(crisis_points.index, sp500.loc[crisis_points.index, 'Close'], color='red', label='Crisis Signal', marker='o')

# 매매 신호 표시
for index, row in data.iterrows():
    if row['Trade_Signal'] == 'Buy':
        plt.annotate('↑', xy=(index, sp500.loc[index, 'Close']), color='green', fontsize=12, ha='center')
    elif row['Trade_Signal'] == 'Sell':
        plt.annotate('↓', xy=(index, sp500.loc[index, 'Close']), color='red', fontsize=12, ha='center')

plt.title('S&P 500 Price with Crisis and Trade Signals (2007-2009, with Tweet Sentiment)')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

# 결과 출력
print(data[['Daily_Return', 'Volatility', 'Tweet_Sentiment', 'Prediction', 'Trade_Signal']].tail())

ModuleNotFoundError: No module named 'tweepy'