# Learning objectives

1. Discuss Lott *et al.* RNA-seq
1. Introduce pandas

# Load packages 

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# All FPKMs

## Load data

In [None]:
df_fpkms = pd.read_csv( "../../qbb2019/data/all.csv" )
df_fpkms

In [None]:
df_fpkms.describe()

## Plot distributions

In [None]:
data = df_fpkms.to_numpy()
type( data )

In [None]:
data[1,:]

In [None]:
data = df_fpkms.iloc[:,2:].to_numpy()
data[1,:]

In [None]:
fig, ax = plt.subplots()
ax.boxplot( data )
plt.show()

## Label x-ticks

In [None]:
sample_names = df_fpkms.columns[2:]
sample_names

In [None]:
fig, ax = plt.subplots()
ax.boxplot( data )
ax.set_xticklabels( sample_names, rotation=90 )
plt.show()

# Sxl FPKMs

## Subset

In [None]:
gene_name = df_fpkms["gene_name"]
gene_name

In [None]:
gene_name == "Sxl"

In [None]:
roi = gene_name == "Sxl"
df_sxl = df_fpkms[roi]
df_sxl

In [None]:
df_sxl.describe()

## Plot distributions

In [None]:
data_sxl = df_sxl.iloc[:,2:]
type( data_sxl )

In [None]:
data_sxl = data_sxl.to_numpy()
type( data_sxl )

In [None]:
fig, ax = plt.subplots()
ax.boxplot( data_sxl )
ax.set_xticklabels( sample_names, rotation=90 )
plt.show()

## Subplots

In [None]:
fig, (ax1, ax2) = plt.subplots( ncols=2 )
ax1.boxplot( data_sxl[:,:8] )
ax1.set_xticklabels( sample_names[:8], rotation=90 )
ax1.set_ylim( 0, 120 )
ax2.boxplot( data_sxl[:,8:] )
ax2.set_xticklabels( sample_names[8:], rotation=90 )
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots( ncols=2, sharey=True )
ax1.boxplot( data_sxl[:,:8] )
ax1.set_xticklabels( sample_names[:8], rotation=90 )
ax2.boxplot( data_sxl[:,8:] )
ax2.set_xticklabels( sample_names[8:], rotation=90 )
plt.show()

# Line plots

## Find highest

In [None]:
df_sxl.sort_values("female_14D")

In [None]:
roi = df_sxl["t_name"] == "FBtr0331261"
df_sxl[roi]

## Plot

In [None]:
y = df_sxl[roi].iloc[:,2:].to_numpy()

fig, ax = plt.subplots()
ax.plot( sample_names, y )
plt.show()

In [None]:
y = df_sxl[roi].iloc[:,2:].to_numpy().T

fig, ax = plt.subplots()
ax.plot( sample_names, y )
plt.show()

In [None]:
y = df_sxl[roi].iloc[0,2:].to_numpy()

fig, ax = plt.subplots()
ax.plot( sample_names, y )
# ax.set_xticklabels( sample_names, rotation=90 )
plt.xticks( rotation=90 )
plt.show()

## Facet

In [None]:
stages = [ "10", "11", "12", "13", "14A", "14B", "14C", "14D" ]
y_male = y[:8]
y_female = y[8:]

fig, (ax1, ax2) = plt.subplots( nrows=2, sharey=True )
ax1.plot( stages, y_male )
ax2.plot( stages, y_female )
plt.show()

## Series

In [None]:
y_series = np.array( [y_male, y_female] )
y_series

In [None]:
fig, ax = plt.subplots()
ax.plot( stages, y_series.T )
ax.legend( ["male", "female"] )
plt.show()

#  Scatterplots

## x-y

In [None]:
x = np.log2( df_fpkms["male_10"] + 1 )
y = np.log2( df_fpkms["male_11"] + 1 )

fig, ax = plt.subplots()
ax.plot( x, y, '.' )
plt.show()

## m-a

In [None]:
m = x - y
a = 0.5 * (x + y)

fig, ax = plt.subplots()
ax.plot( a, m, '.' )
plt.show()