## Setup

Welcome! This notebook combines text blocks (like this one) and interactive code blocks. To run a code block, put your cursor anywhere in the block to select it and then click "Run" at the top. Output for that block will appear just below it. 

You can edit or delete any block, as well as adding new blocks --- feel free to play around with the code, that's the best way to learn! 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
covid_data = pd.read_csv("https://raw.githubusercontent.com/kendavidn/yaounde_serocovpop_shared/v1.0.0/data/yaounde_covid_seroprev_dataset.csv")

In [None]:
covid_data.head()

## Scatterplots

In [None]:
# this ensures we're starting with the default theme settings 
sns.set_theme()

# basic scatterplot
sns.relplot(data = covid_data,
            x="val_age", y="val_height_cm")


In [None]:
# using color for continuous variables
sns.relplot(data = covid_data,
            x="val_age", y="val_height_cm", hue="val_weight_kg")



In [None]:
# using color to show groups

# recode is_smoker to make the variable labels shorter
orig_codes = ["ex_fumeur__j_ai_fum__mais_ne_fume_plus", "fumeur__je_fume_actuellement", "non_fumeur__je_n_ai_jamais_fum"]
new_codes = ["ex-smoker", "smoker", "non-smoker"]

covid_data['is_smoker'] = covid_data['is_smoker'].replace(orig_codes, new_codes)


sns.relplot(data = covid_data,
            x="val_age", y="val_height_cm",
            hue="is_smoker")


In [None]:
# distinguishing groups more clearly with color and shape
sns.relplot(data = covid_data, 
            x="val_age", y="val_height_cm",
            hue="is_smoker", style = "is_smoker")


In [None]:
# custom colors
sns.relplot(data = covid_data, 
            x="val_age", y="val_height_cm",
            hue="is_smoker", style = "is_smoker",
           palette = "colorblind")

# to set the colorblind palette as default for all of our plots
sns.set_theme(palette="colorblind")

In [None]:
# controlling figure aesthetics

# there are 5 preset seaborn themes: darkgrid, whitegrid, dark, white, and ticks
sns.set_style("white")


sns.relplot(data = covid_data, 
            x="val_age", y="val_height_cm",
            hue="is_smoker", style = "is_smoker")


In [None]:
# there are 4 different contexts available: notebook (default), paper, talk, and poster
sns.set_context("poster")

sns.relplot(data = covid_data, 
            x="val_age", y="val_height_cm",
            hue="is_smoker", style = "is_smoker")

# set context back to notebook (the default)
sns.set_context("notebook")

## Histograms

In [None]:
# basic histogram
sns.displot(covid_data, x="val_age")


In [None]:
# change the number of bins
sns.displot(covid_data, x="val_age", binwidth=1)


In [None]:
# change the number of bins
sns.displot(covid_data, x="val_age", binwidth=10)


In [None]:
# using color to show groups
sns.displot(covid_data, x="val_age", hue="is_smoker")


In [None]:
# using color to show groups
sns.displot(covid_data, x="val_age", hue="is_smoker", multiple="stack")


In [None]:
# adding marginal histograms to other plots
sns.jointplot(data=covid_data, x="val_age", y="val_height_cm")


## Line plots

In [None]:
# load the data to use for line plots
fmri = sns.load_dataset("fmri")

In [None]:
# basic line plot
sns.relplot(x="timepoint", y="signal", kind="line", data=fmri)


In [None]:
# using color and line type to show groups
sns.relplot(x="timepoint", y="signal", hue="event", style="event", kind="line", data=fmri)


In [None]:
# using facets
sns.relplot(x="timepoint", y="signal", hue="event", style="event", col = "region", kind="line", data=fmri)


## Trend lines

In [None]:
# linear regression trend lines
sns.lmplot(data = covid_data, 
            x="val_age", y="val_height_cm")


In [None]:
# adjusting alpha
sns.lmplot(data = covid_data, 
            x="val_age", y="val_height_cm", 
           scatter_kws={"alpha": .1})


In [None]:
# polynomial regression trend lines
sns.lmplot(data = covid_data, 
            x="val_age", y="val_height_cm", 
           scatter_kws={"alpha": .1},
           order = 2)


In [None]:
# lowess curve trend lines
sns.lmplot(data = covid_data, 
            x="val_age", y="val_height_cm", 
           scatter_kws={"alpha": .1},
           lowess=True)
