In [1]:
import os
import pandas as pd

# Set paths
home_dir = os.path.expanduser('~')
stock_path = os.path.join(home_dir, "CAPSTONE-stockreturn/data/stock/goog_stock_data.csv")
sentiment_path = os.path.join(home_dir, "CAPSTONE-stockreturn/data/sentiment/Google_merged_file.csv")

# Read the CSV files
stock_df = pd.read_csv(stock_path)
sentiment_df = pd.read_csv(sentiment_path)

# Convert date columns to datetime
stock_df['Date'] = pd.to_datetime(stock_df['Date']).dt.tz_localize('UTC')
sentiment_df['Date'] = pd.to_datetime(sentiment_df['Date']).dt.tz_localize('UTC')

# Keep only Date and Sentiment_Score columns from sentiment data
sentiment_df = sentiment_df[['Date', 'Sentiment_Score']]

# Merge dataframes on Date
merged_df = pd.merge(stock_df, sentiment_df, on='Date', how='outer')

# Drop any rows with NaN values
merged_df = merged_df.dropna()

# Display the first few rows of the merged dataframe
print(merged_df.head())

# Save the merged data
output_path = os.path.join(home_dir, "CAPSTONE-stockreturn/data/stock/goog_stock_wsenti.csv")
merged_df.to_csv(output_path, index=False)
print(f"Merged data saved to: {output_path}")

                         Date  Adj Close      Close       High        Low  \
168 2020-06-01 00:00:00+00:00  71.334686  71.591003  71.898003  70.900002   
169 2020-06-02 00:00:00+00:00  71.703362  71.960999  71.980499  70.941498   
170 2020-06-03 00:00:00+00:00  71.561874  71.819000  72.327599  71.488853   
171 2020-06-04 00:00:00+00:00  70.356201  70.609001  71.947998  70.236504   
172 2020-06-05 00:00:00+00:00  71.662010  71.919502  72.252502  70.300003   

          Open      Volume Ticker Industry  Daily Return  Sentiment_Score  
168  70.919502  24342000.0   GOOG     Tech      0.002030         0.501882  
169  71.527496  25562000.0   GOOG     Tech      0.005168         0.513355  
170  71.915001  25124000.0   GOOG     Tech     -0.001973         0.460719  
171  71.519997  29686000.0   GOOG     Tech     -0.016848         0.602549  
172  70.658501  34698000.0   GOOG     Tech      0.018560         0.603713  
Merged data saved to: /home/rdai/CAPSTONE-stockreturn/data/stock/goog_stock_wsent