In [5]:
# !pip install transformers
# !pip install -q torch torchvision torchaudio
# !pip install supabase
# !pip install dotenv
# !pip install finnhub-python

Collecting supabase
  Downloading supabase-2.15.3-py3-none-any.whl.metadata (11 kB)
Collecting gotrue<3.0.0,>=2.11.0 (from supabase)
  Downloading gotrue-2.12.0-py3-none-any.whl.metadata (6.1 kB)
Collecting postgrest<1.1,>0.19 (from supabase)
  Downloading postgrest-1.0.2-py3-none-any.whl.metadata (3.5 kB)
Collecting realtime<2.5.0,>=2.4.0 (from supabase)
  Downloading realtime-2.4.3-py3-none-any.whl.metadata (6.7 kB)
Collecting storage3<0.12,>=0.10 (from supabase)
  Downloading storage3-0.11.3-py3-none-any.whl.metadata (1.8 kB)
Collecting supafunc<0.10,>=0.9 (from supabase)
  Downloading supafunc-0.9.4-py3-none-any.whl.metadata (1.2 kB)
Collecting pytest-mock<4.0.0,>=3.14.0 (from gotrue<3.0.0,>=2.11.0->supabase)
  Downloading pytest_mock-3.14.1-py3-none-any.whl.metadata (3.9 kB)
Collecting deprecation<3.0.0,>=2.1.0 (from postgrest<1.1,>0.19->supabase)
  Downloading deprecation-2.1.0-py2.py3-none-any.whl.metadata (4.6 kB)
Collecting aiohttp<4.0.0,>=3.11.18 (from realtime<2.5.0,>=2.4.0-

In [2]:
from google.colab import files,drive
drive.mount('/gdrive')

Mounted at /gdrive


In [3]:
import sys
sys.path.append('/gdrive/MyDrive/finn-project/be-ai-model')

from input_processing import get_csv_data, merge_data
from sentiment_model import analyze_sentiment_with_progress, add_integer_column, sums_sentiment_score_for_7_days, update_sentiment_score_in_db
from lstm_model import get_scale_data, get_scale_data_with_fit, create_sequences_for_train, compile_model, train_model, predict_prices, create_sequences_for_prod
from output_processing import compare_prices_with_graph
from market_capitalization import get_capitalization

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/252 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/758 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Device set to use cuda:0


model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

In [8]:
import os
import numpy as np
from datetime import datetime, timedelta
import pandas as pd
from supabase import create_client, Client
import finnhub
from dotenv import load_dotenv

dotenv_path = '/gdrive/MyDrive/finn-project/.env'
load_dotenv(dotenv_path=dotenv_path)

supabase_url = os.environ.get("SUPABASE_URL")
supabase_key = os.environ.get("SUPABASE_API_KEY")
finnhub_api_key = os.environ.get("FINNHUB_API_KEY")

SEQUENCE_LENGTH = 7
SENTIMENT_WINDOW_DAYS = 7
FETCH_DAYS = 13

supabase: Client = create_client(supabase_url, supabase_key)
finnhub_client = finnhub.Client(api_key=finnhub_api_key)

In [9]:
# TSLA에 관한 모델만 1개만 생성되어있으므로, db에서 테슬라에 관한 주가/뉴스 데이터만 가져온다.
stock_response = supabase.table('stocks').select('id,stock_code,company_name').eq('stock_code', 'TSLA').single().execute()
stock_id = stock_response.data['id']
stock_code = stock_response.data['stock_code']
company_name = stock_response.data['company_name']

In [10]:
def get_price_data_from_db(stock_id):
  prices_latest_date_response = supabase.table('stock_prices').select('price_date') \
          .eq('stock_id', stock_id) \
          .order('price_date', desc=True) \
          .limit(1) \
          .single() \
          .execute()

  if not prices_latest_date_response.data:
      print(f"🚨 '{stock_id}'에 대한 최근의 주가 데이터가 DB에 없습니다.")

  prices_latest_date = prices_latest_date_response.data['price_date']

  prices_response = supabase.table('stock_prices').select('*') \
          .eq('stock_id', stock_id) \
          .lte('price_date', prices_latest_date) \
          .order('price_date', desc=True) \
          .limit(FETCH_DAYS) \
          .execute()

  return prices_response.data

In [11]:
def get_news_data_from_db(start_date, end_date, stock_id):

  news_response = supabase.table('news').select('*').eq('stock_id', stock_id) \
  .gte('created_date', start_date.strftime('%Y-%m-%d')) \
  .lte('created_date', end_date.strftime('%Y-%m-%d')) \
  .execute()

  if not news_response.data:
      print(f"🚨 '{stock_id}'에 대한 최근의 뉴스 데이터가 DB에 없습니다.")

  return news_response.data

In [12]:
from tensorflow.keras.models import load_model
import pickle

def get_existing_model():
  load_path = '/gdrive/MyDrive/finn-project/models/tsla_finn_model.keras'
  return load_model(load_path)

def get_existing_scaler():
  load_path = '/gdrive/MyDrive/finn-project/models/tsla_finn_scaler.pkl'
  with open(load_path, 'rb') as f:
      scaler = pickle.load(f)
  return scaler

In [13]:
def get_change_rate(prev_price, today_price):
    change_rate = ((today_price - prev_price) / prev_price) * 100
    return change_rate.round(2)

def get_closely_prev_close_price(df):
    # 2. 'close_price'가 NaN(비어있지 않은)이 아닌 행만 필터링합니다.
    valid_data_df = df.dropna(subset=['close_price'])

    # 3. 인덱스(date)를 기준으로 내림차순 정렬하여 가장 최신 데이터가 맨 위로 오게 합니다.
    sorted_df = valid_data_df.sort_index(ascending=False)

    latest_valid_row = sorted_df.iloc[0]

    # 인덱스가 날짜이므로, .name 속성으로 날짜를 가져옵니다.
    latest_date = latest_valid_row.name.strftime('%Y-%m-%d')
    latest_close_price = latest_valid_row['close_price']


    return latest_close_price



In [14]:
# Predictions row를 만들고, db에 저장(시가총액 정보도 호출하여 저장)
def save_predictions_in_db(stock_id, stock_code, company_name, prediction_price, prediction_date, change_rate, capitalization):
    try :
        response = supabase.table('predictions') \
        .upsert({"stock_id" : stock_id, "prediction_date": prediction_date, "stock_code" : stock_code,
                "company_name" : company_name, "prediction_price" : prediction_price, "change_rate" : change_rate,
                "capitalization" : capitalization}) \
        .execute()

        if hasattr(response, 'error') and response.error is not None:
                print(f"🚨 DB 업데이트 중 에러가 발생했습니다: {response.error}")
        else:
            print("✅ DB 업데이트가 성공적으로 완료되었습니다.")
    except Exception as e:
        print(f"🚨 DB 업데이트 중 예외 발생: {e}")

In [15]:
prices_response = get_price_data_from_db(stock_id)
stock_prices_df = pd.DataFrame(prices_response)
stock_prices_df = stock_prices_df.rename(columns={'price_date':'date', 'id' : 'stock_price_id'})
start_date = pd.to_datetime(stock_prices_df['date']).min()
end_date = pd.to_datetime(stock_prices_df['date']).max()

news_response = get_news_data_from_db(start_date, end_date, stock_id)
news_df = pd.DataFrame(news_response)
news_df = news_df.rename(columns={'created_date':'date', 'id' : 'news_id'})

In [16]:
# 모델 로드
model = get_existing_model()
scaler = get_existing_scaler()

In [17]:
# 감정평가 수행 후, sentiment_score를 db news 테이블에 새롭게 업데이트한다.
news_df = analyze_sentiment_with_progress(news_df)

감성 분석 진행중:   0%|          | 0/15 [00:00<?, ?batch/s]

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


In [18]:
merged_df = merge_data(news_df, stock_prices_df)

In [19]:
merged_df['sentiment_influence'] = 0.0
merged_df = add_integer_column(merged_df)

sums_sentiment_score_for_7_days(merged_df)

  0%|          | 0/464 [00:00<?, ?it/s]

In [20]:
update_sentiment_score_in_db(supabase, merged_df)

🔄 464개의 감성 점수를 DB에 업데이트합니다...
✅ DB 업데이트가 성공적으로 완료되었습니다.


In [21]:
merged_df

Unnamed: 0_level_0,news_id,title,sentiment,confidence,stock_price_id,change_rate,close_price,high_price,low_price,open_price,stock_id,volume,adj_close_price,sentiment_influence,sentiment_score
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2025-05-24,d4556031-9eec-458c-94f3-cdf430319191,‘Tesla tax’ could be no more in United Kingdom...,neutral,0.9164,,,,,,,,,,-0.270030,0
2025-05-27,5b01804f-3d89-40cd-9c6d-a0b3eab30661,Tesla Is Getting Absolutely Creamed In Europe ...,neutral,0.8285,2cd8f560-a0c0-4871-bb0a-c0d7e5942cc9,0.0,362.89,363.79,347.32,347.350,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,120146414.0,362.89,-10.215550,0
2025-05-28,3ec28154-ec1f-4e58-ac91-54f03f177800,Fortune Tech: A bold plan - Fortune,neutral,0.7301,580811d0-16d8-4179-bc63-4c5dd506eb34,0.0,356.90,365.00,355.91,364.840,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,91404309.0,356.90,-11.776510,0
2025-05-23,e53fb621-d377-4835-b0b4-bfc322ca6df4,An Ex-Tesla Engineer Is Turning EVs Into Affor...,neutral,0.8207,1fe120e2-5648-4fb7-9908-02ac4927091c,0.0,339.34,343.18,333.21,337.920,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,84654818.0,339.34,-0.693800,0
2025-05-23,edc7e19a-1597-4b71-9da1-d13a06a3bb14,"Tesla Full Self-Driving veers off road, flips ...",neutral,0.7675,1fe120e2-5648-4fb7-9908-02ac4927091c,0.0,339.34,343.18,333.21,337.920,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,84654818.0,339.34,-0.693800,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-11,1e37ef13-df4d-4a2f-812c-7e7bf772b121,Elon Musk's robotaxi launch in Texas tests his...,neutral,0.7875,87067fe9-1e51-403d-bd7a-bef25a3881f2,0.0,326.43,335.50,322.50,334.395,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,122611360.0,326.43,-21.767525,0
2025-06-11,0e533e3a-2e7a-4f93-b11e-281fde368e8b,Tesla Stock Ekes Out a 4th Straight Day of Gai...,positive,0.7113,87067fe9-1e51-403d-bd7a-bef25a3881f2,0.0,326.43,335.50,322.50,334.395,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,122611360.0,326.43,-21.767525,1
2025-06-11,9ee7b58f-3698-447c-90c5-5b3fe009c3bb,Elon Musk Sets Tentative Tesla Robotaxi Launch...,neutral,0.9298,87067fe9-1e51-403d-bd7a-bef25a3881f2,0.0,326.43,335.50,322.50,334.395,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,122611360.0,326.43,-21.767525,0
2025-06-11,b5da94d3-b10e-4868-a0c6-c9281bd3c0ea,The Latest Tempest for Tesla's Stock Looks to ...,neutral,0.5397,87067fe9-1e51-403d-bd7a-bef25a3881f2,0.0,326.43,335.50,322.50,334.395,c695fc5b-eb68-4fc9-ab14-a16b24af6b37,122611360.0,326.43,-21.767525,0


In [22]:
features = ['sentiment_influence', 'open', 'high', 'low', 'adjClose', 'volume']
target   = 'close'
all_cols = features + [target]

dropped_merged_df = merged_df.rename(columns={'open_price':'open', 'high_price' : 'high', 'low_price' : 'low', 'close_price' : 'close', 'adj_close_price' : 'adjClose'})
dropped_merged_df = dropped_merged_df[ features + [target] ].dropna()

scaled = get_scale_data(scaler, dropped_merged_df)
X = create_sequences_for_prod(scaled)

In [23]:
y_pred_scaled = predict_prices(model, X)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 412ms/step


In [24]:
all_cols = ['sentiment_influence', 'open', 'high', 'low', 'adjClose', 'volume', 'close']
features = all_cols[:-1] # 'close'를 제외한 모든 컬럼
target = 'close'
target_col_index = all_cols.index(target)

num_features = len(features)
dummy_array = np.zeros((len(y_pred_scaled), len(all_cols)))
# 'close' 위치(6번 인덱스)에 예측된 값을 삽입
dummy_array[:, target_col_index] = y_pred_scaled.ravel()
# Scaler를 이용해 전체 배열을 역변환하고, 'close' 컬럼만 추출
y_pred_actual = scaler.inverse_transform(dummy_array)[:, target_col_index]

In [25]:
next_day_predicted_close = y_pred_actual[-1].round(4)
closely_prev_price = get_closely_prev_close_price(merged_df)
change_rate = get_change_rate(closely_prev_price, next_day_predicted_close)
print(f"예측된 실제 종가: ${next_day_predicted_close:.4f}")
capitalization = get_capitalization(finnhub_client, stock_code)
print(capitalization)

예측된 실제 종가: $325.7498
1046365


In [26]:
today_date = datetime.now().strftime("%Y-%m-%d")
save_predictions_in_db(stock_id, stock_code, company_name, next_day_predicted_close, today_date, change_rate, capitalization)

✅ DB 업데이트가 성공적으로 완료되었습니다.
