In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import plotly.express as px
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
apple_path = '/kaggle/input/stock-prices-for/AAPL_data.csv'
amazon_path = '/kaggle/input/stock-prices-for/AMZN_data.csv'
google_path = '/kaggle/input/stock-prices-for/GOOG_data.csv'
microsoft = '/kaggle/input/stock-prices-for/MSFT_data.csv'

In [None]:
paths = [apple_path, amazon_path, google_path, microsoft]

# Common descriptive analysis of Apple, Amazon, Google and Microsoft respectively

In [None]:
pd.read_csv(paths[0]).describe()

In [None]:
pd.read_csv(paths[1]).describe()

In [None]:
pd.read_csv(paths[2]).describe()

In [None]:
pd.read_csv(paths[3]).describe()

# Checking for NaN values

In [None]:
for i in paths:
    print(i.split('/')[-1][:-4])
    print(pd.read_csv(i).isna().sum())
    print()

# EDA pipeline for each company

In [None]:
def eda_company(path):
    df = pd.read_csv(path)
    numerical_cols = ['open', 'high', 'low', 'close', 'volume']
    n = len(numerical_cols)
        
    sns.pairplot(df, vars=numerical_cols)
    
    fig, axes = plt.subplots(nrows=2, ncols=n, figsize=(15, 5))
    for i, j in enumerate(numerical_cols):
        sns.boxplot(df, x=j, ax=axes[0][i])
        sns.histplot(df, x=j, ax=axes[1][i], kde=True)
        
    plt.tight_layout()
        
    plt.show()

# Data for Apple

In [None]:
eda_company(apple_path)

# Data for Amazon

In [None]:
eda_company(amazon_path)

# Data for Google

In [None]:
eda_company(google_path)

# Data for Microsoft

In [None]:
eda_company(microsoft)

In [None]:
aapl = pd.read_csv(apple_path)
amzn = pd.read_csv(amazon_path)
goog = pd.read_csv(google_path)
msft = pd.read_csv(microsoft)

aapl = aapl.values
amzn = amzn.values
goog = goog.values
msft = msft.values

concat = np.vstack((aapl,amzn,goog,msft))
cols = ['date', 'open', 'high', 'low', 'close', 'volume','Name']

dt = pd.DataFrame(concat, columns=cols)

In [None]:
def plots(df, x):
    group = df.groupby('Name')
    fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 10))
    sns.histplot(df,x=x,hue='Name',kde=True, ax=axes[0])
    sns.barplot(x=group[x].mean().index,
               y=group[x].mean(),ax=axes[1])
    
    for container in axes[1].containers:
        axes[1].bar_label(container,size=15,color='black')
        
    axes[2].pie(df['Name'].value_counts(),
               labels=df['Name'].value_counts().index,
               autopct='%0.2f%%')
    
    plt.suptitle("Barplot of {}'s mean values and data distribution grouped by Name".format(x), size=15)
    
    plt.tight_layout()
    plt.show()

In [None]:
nums = ['open', 'high', 'low', 'close', 'volume']

for i in nums:
    plots(dt, i)

In [None]:
def time_series(df, x):
    fig = px.line(df, x='date',y=x,facet_col='Name')
    fig.show()

# Time Series data for each company

In [None]:
for i in nums:
    time_series(dt, i)