In [22]:
import pandas as pd
import sys

# Add the '../scripts' directory to the system path to import custom modules
sys.path.append('../scripts')

# Import the merge function from your custom script
from merge import merge_news_and_stock

# Define file paths for the input data and output merged file
news_path = "../datas/sentimenta_added_data.csv"
stock_path  = "../datas/TSLA_historical_data_cleaned.csv"
merge_path = "../datas/TSLA_merged_data.csv"

# Load news data CSV into a DataFrame
news_df = pd.read_csv(news_path)

# Load stock data CSV into a DataFrame
stock_df = pd.read_csv(stock_path)

# Convert 'date' column in news data to datetime with timezone awareness (UTC)
# 'errors=coerce' converts invalid dates to NaT (missing), which will be dropped next
news_df['date'] = pd.to_datetime(news_df['date'], utc=True, errors='coerce')

# Remove rows where 'date' conversion failed and resulted in missing values
news_df.dropna(subset=['date'], inplace=True)

# Extract only the date part (year-month-day) from the datetime for merging
news_df['date'] = news_df['date'].dt.date

# Convert 'Date' column in stock data to datetime (assumed naive, no timezone)
# Then extract only the date part to match news dates
stock_df['Date'] = pd.to_datetime(stock_df['Date']).dt.date

# Merge news and stock DataFrames on matching dates using a left join
# This keeps all news records, adding stock data where available for the same date
merged_df = pd.merge(news_df, stock_df, how='left', left_on='date', right_on='Date')

# (Optionally) Save merged DataFrame to CSV file
merged_df.to_csv(merge_path, index=False)


In [23]:
import pandas as pd
import sys

# Add the '../scripts' directory to the system path to import custom modules
sys.path.append('../scripts')

# Import the merge function from your custom script
from merge import merge_news_and_stock

# Define file paths for the input data and output merged file
news_path_NVDA = "../datas/sentimenta_added_data.csv"
stock_path_NVDA  = "../datas/NVDA_historical_data_cleaned.csv"
merge_path_NVDA = "../datas/NVDA_merged_data.csv"

# Load news data CSV into a DataFrame
news_df_NVDA = pd.read_csv(news_path_NVDA)

# Load stock data CSV into a DataFrame
stock_df_NVDA = pd.read_csv(stock_path_NVDA) 

# Convert 'date' column in news data to datetime with timezone awareness (UTC)
# 'errors=coerce' converts invalid dates to NaT (missing), which will be dropped next
news_df_NVDA['date'] = pd.to_datetime(news_df['date'], utc=True, errors='coerce')

# Remove rows where 'date' conversion failed and resulted in missing values
news_df_NVDA.dropna(subset=['date'], inplace=True)

# Extract only the date part (year-month-day) from the datetime for merging
news_df_NVDA['date'] = news_df_NVDA['date'].dt.date

# Convert 'Date' column in stock data to datetime (assumed naive, no timezone)
# Then extract only the date part to match news dates
stock_df_NVDA['Date'] = pd.to_datetime(stock_df_NVDA['Date']).dt.date

# Merge news and stock DataFrames on matching dates using a left join
# This keeps all news records, adding stock data where available for the same date
merged_df_NVDA = pd.merge(news_df_NVDA, stock_df_NVDA, how='left', left_on='date', right_on='Date')

# (Optionally) Save merged DataFrame to CSV file
merged_df_NVDA.to_csv(merge_path_NVDA, index=False)


In [4]:
import pandas as pd

def merge_news_and_stock(news_path, stock_path, merge_path):
    # Load news data CSV into a DataFrame
    news_df = pd.read_csv(news_path)
    
    # Load stock data CSV into a DataFrame
    stock_df = pd.read_csv(stock_path)
    
    # Convert 'date' column in news data to datetime with timezone awareness (UTC)
    news_df['date'] = pd.to_datetime(news_df['date'], utc=True, errors='coerce')
    
    # Remove rows where 'date' conversion failed and resulted in missing values
    news_df.dropna(subset=['date'], inplace=True)
    
    # Extract only the date part (year-month-day) from the datetime for merging
    news_df['date'] = news_df['date'].dt.date
    
    # Convert 'Date' column in stock data to datetime and extract date part
    stock_df['Date'] = pd.to_datetime(stock_df['Date']).dt.date
    
    # Merge news and stock DataFrames on matching dates using a left join
    merged_df = pd.merge(news_df, stock_df, how='left', left_on='date', right_on='Date')
    
    # Save merged DataFrame to CSV file
    merged_df.to_csv(merge_path, index=False)

# 🚀 Now the function is called OUTSIDE the function block
merge_news_and_stock(
    news_path="../datas/sentimenta_added_data.csv",
    stock_path="../datas/MSFT_historical_data_cleaned.csv",
    merge_path="../datas/MSFT_merged_data.csv"
)
merge_news_and_stock(
    news_path="../datas/sentimenta_added_data.csv",
    stock_path="../datas/META_historical_data_cleaned.csv",
    merge_path="../datas/META_merged_data.csv"
)
merge_news_and_stock(
    news_path="../datas/sentimenta_added_data.csv",
    stock_path="../datas/GOOG_historical_data_cleaned.csv",
    merge_path="../datas/GOOG_merged_data.csv"
)
merge_news_and_stock(
    news_path="../datas/sentimenta_added_data.csv",
    stock_path="../datas/AMZN_historical_data_cleaned.csv",
    merge_path="../datas/AMZN_merged_data.csv"
)

merge_news_and_stock(
    news_path="../datas/sentimenta_added_data.csv",
    stock_path="../datas/AAPL_historical_data_cleaned.csv",
    merge_path="../datas/AAPL_merged_data.csv"
)

