# Hyper

A script to detect the most hyped (popular) bands of a festival

In [1]:
# https://www.dataquest.io/blog/python-pandas-databases/

In [22]:
import sqlite3
import pandas as pd
import string
import unicodedata

## Load Data

In [3]:
# Setup sqlite
sqlite_file = 'hyper.db'

# Connect to the database sqlite file
connection = sqlite3.connect(sqlite_file)
db = connection.cursor()

In [68]:
# get list of bands
bands = pd.read_sql_query("SELECT * FROM Bands limit 10;", connection)
bands.head()

Unnamed: 0,id,name,codedName,twitterName
0,1,Arcade Fire,arcadefire,@arcadefire
1,2,Bon Iver,boniver,@boniver
2,3,Mishima,mishima,@mishima
3,4,!!!,!!!,@chkchkchk
4,5,Anímic,animic,@animic


In [59]:
# read tweets from db
tweets = pd.read_sql_query("SELECT * FROM TweetsRaw limit 10;", connection)
#tweets.head()
#tweets.describe()

## Partition per band

Look for tweets talking about bands and re-create data structure in a band-centered way

In [111]:
def extract_bands(tweet):
    """
    Function that extracts the bands from a tweet text
    Returns a list of bands
    """
    
    # init list to return
    bands_in_tweet = []
    
    # loop all bands and check of any of the written forms is present in the tweet text
    for i, b in bands.iterrows():
                
        # set different band names writing possibilities
        bandname = b['name']
        bandname_lowercase = bandname.lower()
        bandname_lowercase_no_spaces = ''.join(bandname_lowercase.split())
        bandname_lowercase_no_accents = ''.join((c for c in unicodedata.normalize('NFD', bandname_lowercase) if unicodedata.category(c) != 'Mn'))
        bandname_lowercase_no_spaces_no_accents = ''.join((c for c in unicodedata.normalize('NFD', bandname_lowercase_no_spaces) if unicodedata.category(c) != 'Mn'))

        # check if any of the forms is in the tweet text
        if any(s in tweet['tweetText'].lower() for s in [bandname_lowercase, bandname_lowercase_no_spaces, bandname_lowercase_no_accents, bandname_lowercase_no_spaces_no_accents, b['twitterName']]):
            bands_in_tweet.append({"id": b['id'], "codedName": b['codedName']})

    return bands_in_tweet

In [112]:
# extract bands for each tweet
tweets['bands'] = tweets.apply(extract_bands, axis=1)

In [158]:
def band_partition(tweet):
    """
    Function that reads a single tweet info and adds into a list the tweet information partitioned by bands.
    I.e. If a tweet mentions 2 bands, it adds a list of 2 dicts with the tweet info
    """

    # loop all bands and add an entry to the list
    for b in tweet['bands']:
        band_tweets_list.append({\
                                 "tweetRawId" : tweet['id'],\
                                 "createdAt" : tweet['createdAt'],\
                                 "storedAt" : tweet['storedAt'],\
                                 "bandId" : b['id'],\
                                 "bandCodedName" : b['codedName'],\
                                 "favsCount" : tweet['favsCount'],\
                                 "rtsCount" : tweet['rtsCount'],\
                                 "language" : tweet['language'],\
                                 "userId" : tweet['userId'],\
                                 "userFriendsCount" : tweet['userFriendsCount'],\
                                 "userFollowersCount" : tweet['userFollowersCount'],\
                                 "userStatusesCount" : tweet['userStatusesCount'],\
                                 "userFavsCount" : tweet['userFavsCount'],\
                                 "userLocation" : tweet['userLocation']\
                                })

In [163]:
# construct the list of tweets per band
band_tweets_list = []
tweets.apply(band_partition, axis=1);

# create a dataframe from the previous list
band_tweets = pd.DataFrame.from_dict(band_tweets_list)
band_tweets.head(5)

Unnamed: 0,bandCodedName,bandId,createdAt,favsCount,language,rtsCount,storedAt,tweetRawId,userFavsCount,userFollowersCount,userFriendsCount,userId,userLocation,userStatusesCount
0,arcadefire,1,Mon May 01 10:58:45 +0200 2017,79,la,87,Mon May 01 10:58:45 +0200 2017,2,6825,416,0,522,Barcelona,7438
1,mishima,3,Mon May 01 10:58:45 +0200 2017,79,la,87,Mon May 01 10:58:45 +0200 2017,2,6825,416,0,522,Barcelona,7438
2,mishima,3,Mon May 01 10:58:45 +0200 2017,40,la,2,Mon May 01 10:58:45 +0200 2017,4,4262,784,0,203,Barcelona,2471
3,arcadefire,1,Mon May 01 10:58:45 +0200 2017,4,la,91,Mon May 01 10:58:45 +0200 2017,5,225,164,7,894,Barcelona,9958
4,mishima,3,Mon May 01 10:58:45 +0200 2017,4,la,91,Mon May 01 10:58:45 +0200 2017,5,225,164,7,894,Barcelona,9958
