In [1]:
import numpy as np
import pandas as pd
import psycopg2
import itertools
import matplotlib.pyplot as plt
from sklearn.manifold import MDS

In [2]:
# Connect to database of comments
conn = psycopg2.connect("dbname=reddit_data user=brendan")
cur = conn.cursor() 

In [3]:
# Get total number of comments in db
x = cur.execute("SELECT COUNT(*) FROM comments")
NUM_TOTAL = cur.fetchall()[0][0] # 330762 comments in total

In [4]:
# Close db connection
cur.close()
conn.close()

In [5]:
# Load commention data
num_comments_by_phone = pd.read_csv("/Users/brendan/Desktop/sabnis_research/code/data/reddit_comments.csv", index_col=0)

In [6]:
num_comments_by_phone.head()

Unnamed: 0,iphone,galaxy,htc,lg,pixel,iphone galaxy,iphone htc,iphone lg,iphone pixel,galaxy htc,galaxy lg,galaxy pixel,htc lg,htc pixel,lg pixel
2016-09-04,5125.0,833.0,965.0,582.0,1877.0,210.0,131.0,67.0,370.0,80.0,43.0,82.0,89.0,156.0,59.0
2016-09-11,3163.0,575.0,596.0,470.0,1352.0,139.0,96.0,57.0,172.0,50.0,35.0,59.0,68.0,100.0,41.0
2016-09-18,5917.0,1742.0,1802.0,1189.0,4853.0,554.0,316.0,193.0,1576.0,109.0,100.0,348.0,207.0,477.0,244.0
2016-09-25,3703.0,1562.0,849.0,1042.0,3139.0,239.0,129.0,121.0,573.0,72.0,69.0,133.0,116.0,199.0,187.0
2016-10-02,3384.0,746.0,702.0,791.0,3384.0,135.0,107.0,74.0,657.0,39.0,41.0,136.0,54.0,224.0,118.0


In [8]:
# Use Lift normalization for our dsimilarity measure.
# Lift = P(A,B) / (P(A) x P(B))

PHONES = ['iphone', 'galaxy', 'htc', 'lg', 'pixel']

def get_lifted(date, num_comments_by_phone):
    PHONES = ['iphone', 'galaxy', 'htc', 'lg', 'pixel']
    lift_matrix = pd.DataFrame(np.zeros((5,5)), index=PHONES, columns=PHONES)

    for combo in itertools.combinations(PHONES, 2):
        pair = ' '.join(combo) # string to access dataframe
        
        # Lift calculations
        pab = num_comments_by_phone[pair][date] / NUM_TOTAL
        pa = num_comments_by_phone[combo[0]][date] / NUM_TOTAL
        pb = num_comments_by_phone[combo[1]][date] / NUM_TOTAL
        lift = pab / (pa * pb)
        
        lift_matrix[combo[0]][combo[1]] = 1 / lift # Using inverse because sklearn asks for a "dissimilarity" matrix
        lift_matrix[combo[1]][combo[0]] = 1 / lift # For symmetry
        
    return lift_matrix

In [33]:
# Apply MDS mapping to commention data for every week and save coordinates in the following arrays
iphone_coords = []
galaxy_coords = []
htc_coords = []
lg_coords = []
pixel_coords = []

for date, ctr in zip(list(num_comments_by_phone.index), range(len(list(num_comments_by_phone.index)))):
    save_name = str(ctr).zfill(4) + '.png'
    lift_matrix = get_lifted(date, num_comments_by_phone)
    
    # MDS mapping with SKLearn
    embedding = MDS(n_components=2, dissimilarity='precomputed')
    coords = embedding.fit_transform(lift_matrix)
    
    iphone_coords.append(coords[0,:])
    galaxy_coords.append(coords[1,:])
    htc_coords.append(coords[2,:])
    lg_coords.append(coords[3,:])
    pixel_coords.append(coords[4,:])

ic = np.array(iphone_coords)
gc = np.array(galaxy_coords)
hc = np.array(htc_coords)
lc = np.array(lg_coords)
pc = np.array(pixel_coords)

In [77]:
# Create and save graphs of MDS coordinates
# Connect five points at a time so the graph doesn't get too cluttered (think of the classic game Snake)
# Use http://gifmaker.me/ to create the gif

phone_coords = np.array([ic,gc,hc,lc,pc])
colors = ['b', 'g', 'r', 'c', 'm'] # iPhone -> blue, Galaxy -> green, HTC -> red, LG -> cyan, Pixel -> magenta
img_save_path = '/Users/brendan/Desktop/sabnis_research/smartphone-comention-analysis/mds_imgs/'

for i in range(4, len(ic)):
    save_name = str(i).zfill(4) + '.png'
    
    fig = plt.figure(1, figsize=(10,10))
    ax = plt.subplot(111)
    ax.set_xlim(-0.1, 0.1)
    ax.set_ylim(-0.1, 0.1)
    
    # Add points
    for j in range(i-5, i):
        for phone, color in zip(phone_coords, colors):
            plt.scatter(phone[j,0], phone[j,1], c=color)
    # Add lines connecting points
    for j in range(i-4, i):
        for phone, color in zip(phone_coords, colors):
            plt.plot([phone[j-1,0], phone[j,0]], [phone[j-1,1], phone[j,1]], color, lw=1)
            
    ax.legend(PHONES)
    plt.savefig(img_save_path + save_name)
    plt.close(fig)