In [None]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import scipy.stats as stats

To deal with column of different types (col 1 has the dates as strings), we use pandas.
You could use numpy with a little extra work.

In [None]:
filename = "flu.csv"
data = pd.read_csv(filename, skiprows=1, quotechar='\'')

In [None]:
columns = list(data.columns)
print(columns)

(nb: The standard error doesn't have much meaning for the time-stamped data here!)

In [None]:
for col in columns:
    if col == columns[0]: continue
    mean = np.mean(data[col])
    error = stats.sem(data[col])
    print(f"{col:9} {mean:.2f} +/- {error:.2f}")

In [None]:
regions = columns[1:len(columns)-1]
print(regions)

In [None]:
random_col = np.random.randint(1, len(columns)-2)
col = columns[random_col]
print(random_col)

plt.hist(data[col])
plt.xlabel("Fluiness")
plt.ylabel("Count")
plt.show()

In [None]:
for region in regions:
    x = np.linspace(min(data[region]), max(data[region]), 100)
    kde = stats.gaussian_kde(data[region])
    plt.plot(x, kde(x), linewidth = 2, label=region)
    plt.fill_between(x, kde(x), alpha=0.4)
plt.xlabel("Fluiness")
plt.ylabel("KDE")
plt.legend()
plt.show()

In [None]:
x = np.linspace(0.05, 1.0, 20)
for region in regions:
    set = data[region]
    q = np.quantile(set, x)
    # nb: change units, just in plot quantile -> percentile
    plt.plot(x, q, label=region)
plt.ylabel("Fluiness")
plt.xlabel("Quantile")
plt.legend()
plt.show()

In [None]:
col1 = "MidAtl"
col2 = "SAtl"
x = np.linspace(0.05, 1.0, 20)
q1 = np.quantile(data[col1], x)
q2 = np.quantile(data[col2], x)
axmin = min(min(data[col1]), min(data[col2]))
axmax = max(max(data[col1]), max(data[col2]))
plt.axline((0.5, 0.5), slope=1, color="r")
plt.plot(q1,q2,"-", linewidth=2)
plt.plot(q1,q2,"k.")
plt.gca().set_aspect('equal')
plt.xticks()
plt.ylim(0.95*axmin, 1.05*axmax)
plt.xlim(0.95*axmin, 1.05*axmax)
plt.ylabel(col1 + " fluiness")
plt.xlabel(col2 + " fluiness")
plt.show()

### Tukey mean-difference plot

Given two quantiles, $q_1(x)$ and $q_2(x)$, 
the Tukey mean difference plot is a plot of $Y$ vs. $X$, where:
$$ Y(x) = q_2(x) - q_1(x) $$
$$ X(x) = \frac{1}{2}\left[ q_1(x) + q_2(x) \right] $$
That is, it plots the difference of the quantiles against their average. The advantage of the Tukey mean-difference compared to the q-q plot is that it converts interpretation of the differences around a 45 degree diagonal line to interpretation of differences around a horizontal zero line. 

In [None]:
x = 0.5*(q1+q2)
y = q2-q1
plt.plot(x,y,"-", linewidth=2)
plt.plot(x,y,"k.")
plt.axline((0.5, 0.0), slope=0, color="r")
plt.show()

(Technically time-series data, not just univariate data)

So, do this relative to the 1/1/2006 (will run from -ve to +ve)

In [None]:
from datetime import datetime

dates = [datetime.strptime(date, '%m/%d/%Y') for date in data["Date"]]

plt.plot(dates, data["MidAtl"])
plt.show()

In [None]:
import seaborn as sns

# uniform_data = np.random.rand(10, 12)
# print(uniform_data)

region_data = data[regions]

# data.set_index(data['Date'])
# data['Date'] = dates

ax = sns.heatmap(region_data)
plt.show()

In [None]:
ax = sns.heatmap(region_data.transpose())
plt.show()

In [None]:
labels = []
for i, date in enumerate(dates):
  label = str(date.day) + "-" + date.strftime("%b")+ "-" + date.strftime("%y")
  labels.append(label)

print(labels)

ax = sns.heatmap(region_data.transpose(), xticklabels=dates)
plt.show()

In [None]:
labels = []
for i, date in enumerate(dates):
  label = ""
  if i%10 == 0:
    label = str(date.day) + "-" + date.strftime("%b")+ "-" + date.strftime("%y")
  labels.append(label)

print(labels)

ax = sns.heatmap(region_data.transpose(), xticklabels=labels)
plt.show()

In [None]:
print(list(plt.colormaps))

ax = sns.heatmap(region_data.transpose(), xticklabels=labels, cmap = "icefire")
ax.set_title("Fluiness")
ax.set_xticklabels(labels, rotation=50)
plt.show()

### 6. Bar charts

In [None]:
fig, axs = plt.subplots(nrows=2, ncols=1)
axs[0].bar(dates, data["NE"], align='center', width=5)
axs[1].barh(dates, data["Mtn"], align='center', height=5)
plt.show()