In [2]:
import pandas as pd

## Line chart data

In [4]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

df = df.loc[df.Name.isin(["AAPL", "JPM", "GOOGL", "AMZN"])]
df["Date"] = pd.to_datetime(df.Date)

In [9]:
df[df.Date.dt.year >= 2015].to_csv("data/line_plot.csv", index=False)

## Bar chart data

In [15]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

df = df.loc[df.Name == "AAPL", ["Date", "Open", "Close", "Name"]]
df["Year"] = pd.to_datetime(df.Date).dt.year
df = df.query("Year >= 2014").groupby("Year").max().reset_index(drop=False)

In [16]:
df.drop(columns=["Date"]).to_csv("data/bar_chart.csv", index=False)

## Stacked bar chart data

In [17]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

stocks_filter = ["AAPL", "JPM", "GOOGL", "AMZN", "IBM"]
df = df[df.Name.isin(stocks_filter)]
df["Date"] = pd.to_datetime(df.Date)
df["Year"] = pd.to_datetime(df.Date).dt.year
df["Volume"] = df["Volume"] / 1e9

df = (
    df[["Year", "Volume", "Name"]]
    .query("Year >= 2012")
    .groupby(["Year", "Name"])
    .sum()
    .reset_index(drop=False)
)

In [20]:
df.to_csv("data/stacked_bar_chart.csv", index=False)

## Stacked area chart data

In [21]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

stocks = ["AAPL", "AMZN", "GOOGL", "IBM", "JPM"]
df = df.loc[df.Name.isin(stocks), ["Date", "Name", "Volume"]]
df["Date"] = pd.to_datetime(df.Date)
df = df[df.Date.dt.year >= 2017]
df["Volume Perc"] = df["Volume"] / df.groupby("Date")["Volume"].transform("sum")

In [23]:
df.to_csv("data/stacked_area_chart.csv", index=False)

## Pie/Donut chart data

In [24]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

stocks_filter = ["AAPL", "JPM", "GOOGL", "AMZN", "IBM"]
df = df.loc[df.Name.isin(stocks_filter), ["Name", "Volume"]]
df = df.groupby("Name").sum().reset_index()

In [25]:
df.to_csv("data/donut_chart.csv", index=False)

## Histogram data

In [32]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

stocks_filter = ["GOOGL"]
df = df.loc[df.Name.isin(stocks_filter), ["Name", "Date", "Close"]]

In [33]:
df.to_csv("data/histogram.csv", index=False)

## Scatter plot data

In [36]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

stocks_filter = ["GOOGL", "AMZN"]
df = df.loc[
    (df.Name.isin(stocks_filter)) & (pd.to_datetime(df.Date).dt.year >= 2017),
    ["Date", "Name", "Open", "Close"],
]
df["Return"] = (df["Close"] - df["Open"]) / df["Open"]

In [39]:
df.to_csv("data/scatter_plot.csv", index=False)

In [None]:
[object HTMLDivElement]Python Script Area99123456789101112131415›import matplotlib.ticker as mtickerfig, ax = plt.subplots(figsize=(6, 6))df_wide = df.pivot(index="Date", columns="Name", values="Return")ax = df_wide.plot.scatter(    x="GOOGL",     y="AMZN",     title="Daily returns - GOOGL vs. AMZN",     ax=ax)ax.yaxis.set_major_formatter(mticker.PercentFormatter(1))ax.xaxis.set_major_formatter(mticker.PercentFormatter(1))figPython Script Run Button

## Box plot data

In [41]:
url = "https://raw.githubusercontent.com/szrlee/Stock-Time-Series-Analysis/master/data/all_stocks_2006-01-01_to_2018-01-01.csv"
df = pd.read_csv(url)

stocks = ["AMZN", "GOOGL", "IBM", "JPM"]
df = df.loc[
    (df.Name.isin(stocks)) & (pd.to_datetime(df.Date).dt.year == 2016),
    ["Date", "Name", "Close", "Open"],
]
df["Return"] = (df["Close"] - df["Open"]) / df["Open"]
df["Date"] = pd.to_datetime(df.Date)

In [42]:
df.to_csv("data/box_plot.csv", index=False)