In [None]:
%matplotlib inline

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sqlite3 as sql

In [None]:
# open database, import data into pandas
conn = sql.connect("sbtb.db")
df = pd.read_sql_query("SELECT * from Reviews;", conn)

print(df.head())

conn.close()

In [None]:
#print(df['Reviewer'].unique())
reviewers = df[['Reviewer','Id']]
reviewer_count = reviewers.groupby('Reviewer').count().sort_values('Id')
#print(reviewer_count)
#reviewer_count.sort_values(['Id']).plot(kind='barh',sort_columns=True,legend=False)

y_pos = np.arange(len(reviewer_count))

fig = plt.figure(figsize=(8.5, 5))
ax = fig.add_subplot(111)
ax.set_frame_on(False)

ax.barh(y_pos, reviewer_count['Id'], align='center', color='grey', lw=0)
ax.set_yticks(y_pos)
ax.set_yticklabels(reviewer_count.index)

ax.set_xlabel('# reviews', size=14)
ax.set_ylabel('Reviewer', size=14)

ax.yaxis.set_ticks_position('none')
ax.yaxis.set_tick_params(labelsize=12)

ax.xaxis.set_ticks_position('bottom')
ax.xaxis.set_tick_params(width=2, length=7, color='grey', labelsize=12)


plt.show()


---

## Rendering grades

For proper rendering of column names, grades should be converted to numbers. Make an inverted grade dictionary with ```-1``` set to 'Misc' for plotting ease. Saved in a new column ```grade_num```.

Let's use a 5 point scale +/- 0.3 for plus and minus.

In [None]:
#print(df.Grade.unique())
grade_dict = {
    'A+': 5.3,
    'A' : 5.0,
    'A-': 4.7,
    'B+': 4.3,
    'B' : 4.0,
    'B-': 3.7,
    'C+': 3.3,
    'C' : 3.0,
    'C-': 2.7,
    'D+': 2.3,
    'D' : 2.0,
    'D-': 1.7,
    'F+': 1.3,
    'F' : 1.0,
    'DNF' : 0.,
    'Rant' : -1,
    'Squee' : -1,
    'N/A' : -1
}

grade_dict_invert = {v: k for k, v in grade_dict.items()}
grade_dict_invert[-1] = 'Misc'

In [None]:
for grade in grade_dict:
    df.loc[df["Grade"] == grade,"grade_num"] = grade_dict[grade]

In [None]:
grades = df[['grade_num','Id']]
grades_count = grades.groupby('grade_num').count(); #print((grades_count['Id']))
#print(grades_count)
#grades_count.sort_values(['Id']).plot(kind='barh',legend=False,grid=True)

y_pos = np.arange(len(grades_count)); 
y_labels = [grade_dict_invert[k] for k in grades_count.index]
#print(y_labels)

fig = plt.figure(figsize=(8.5, 6))
ax = fig.add_subplot(111)
ax.set_frame_on(False)

ax.barh(y_pos, grades_count['Id'], align='center', color='grey', lw=0)
ax.set_yticks(y_pos)
ax.set_yticklabels(y_labels)

ax.set_xlabel('# reviews', size=14)
ax.set_ylabel('Review grade', size=14)

ax.yaxis.set_ticks_position('none')
ax.yaxis.set_tick_params(labelsize=12)

ax.xaxis.set_ticks_position('bottom')
ax.xaxis.set_tick_params(width=2, length=7, color='grey', labelsize=12)


#grades_count.plot(kind='barh',legend=False,grid=True)

Now we want to look at grade distributions for each reviewer. 
I would expect Guest Reviews are going to be bimodal (very positive or very negative).

In [None]:
reviewers = df['Reviewer'].unique()
for reviewer in reviewers:
    #print(reviewer)
    grades = df[df['Reviewer']== reviewer]
    grades = grades[['Grade','Id']]
    #print(grades)
    grades_count = grades.groupby('Grade').count()
    #print(grades_count)
    grades_count.plot(kind='bar',legend=False,grid=True,title=reviewer)