In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#import datasets
import_base_path = "/Users/jackkim4/Documents/Compliace:Talent/MSAAI/AAI501"
model_1_path = f"{import_base_path}/model1_data_AAPL.csv"
model_2_path = f"{import_base_path}/model2_data_all_companies.csv"
model_1_data = pd.read_csv(model_1_path)
model_2_data = pd.read_csv(model_2_path)

# Convert 'date' to datetime format
model_1_data['date'] = pd.to_datetime(model_1_data['date'])
model_2_data['date'] = pd.to_datetime(model_2_data['date'])

# Summary Statistics
print("AAPL Summary:\n", model_1_data[['open', 'close', 'volume']].describe())
print("\nIT Companies Summary:\n", model_2_data[['open', 'close', 'volume']].describe())

In [None]:
# Price Distribution
plt.figure(figsize=(12, 5))
sns.histplot(model_1_data['close'], bins=50, label='AAPL', kde=True, color='blue')
sns.histplot(model_2_data['close'], bins=50, label='IT Companies', kde=True, color='red', alpha=0.6)
plt.xlabel("Closing Price")
plt.ylabel("Frequency")
plt.title("Distribution of Closing Prices")
plt.legend()
plt.show()

# Volume Distribution
plt.figure(figsize=(12, 5))
sns.histplot(model_1_data['volume'], bins=50, label='AAPL', kde=True, color='blue')
sns.histplot(model_2_data['volume'], bins=50, label='IT Companies', kde=True, color='red', alpha=0.6)
plt.xlabel("Trading Volume")
plt.ylabel("Frequency")
plt.title("Distribution of Trading Volume")
plt.legend()
plt.show()

In [None]:
# Time-Series Plot of Closing Prices
plt.figure(figsize=(12, 6))
sns.lineplot(x=model_1_data['date'], y=model_1_data['close'], label='AAPL', color='blue')
sns.lineplot(data=model_2_data, x='date', y='close', hue='symbol', alpha=0.8)
plt.xlabel("Date")
plt.ylabel("Closing Price")
plt.title("Stock Price Trends Over Time")
plt.legend()
plt.xticks(rotation=45)
plt.show()

# Time-Series Plot of Trading Volume
plt.figure(figsize=(12, 6))
sns.lineplot(x=model_1_data['date'], y=model_1_data['volume'], label='AAPL', color='blue')
sns.lineplot(data=model_2_data, x='date', y='volume', hue='symbol', alpha=0.8)
plt.xlabel("Date")
plt.ylabel("Trading Volume")
plt.title("Trading Volume Trends Over Time")
plt.legend()
plt.xticks(rotation=45)
plt.show()

In [None]:
# Correlation Matrix and Headtmap
pivot_data = model_2_data.pivot(index='date', columns='symbol', values='close')
correlation_matrix = pivot_data.corr()
plt.figure(figsize=(10, 6))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f")
plt.title("Correlation Matrix of Stock Closing Prices")
plt.show()