In [13]:
import mysql.connector 
from database.connect import getConnection
import config.env

In [14]:
#create a dictionary of all airline user_ids for later on
airlines= {56377143 : 'KLM', 106062176:'AirFrance',18332190:"British_Airways", 22536055:"AmericanAir",
           124476322:"Lufthansa",26223583:'AirBerlin',2182373406:'AirBerlin assist',38676903:"easyJet",1542862735:"RyanAir",
           253340062:"SingaporeAir",218730857:"Qantas",45621423:"EtihadAirways",20626359:"VirginAtlantic"}

In [15]:
db = getConnection()
cursor=db.cursor() #initialize mysql to be able to query and edit the SQL tables.

In [16]:
cursor.execute("SELECT * FROM `tweets` ORDER BY timestamp_ms DESC")
total=cursor.fetchall() #store all tweets from new to old in the variable 'total'

Tables: <br>
tweets: (id, text, in_reply_to_status_id, coordinates, timestamp_ms, verified, followers_count, statuses_count, user_id, language, mentions, airlines, sentiment) <br>
conversations: (id, Tstart, Tend, Airline, Length)<br>
part_of: (tID, cID, Position)

In [17]:
def create_tables() -> str:
    """This function will create a new table in the MySQL environment called conversations with the following attributes:
    conversation ID, conversation start time, conversation end time, has an airline participated, length of conversations,
    and the response time. Furthermore, also create a table that links tweets to conversations they are a part of."""
    database = config.env.getConfig('DB_DATABASE')
    
    cursor.execute(f"""CREATE TABLE `{database}`.`conversations`(id MEDIUMINT NOT NULL, Tstart BIGINT NOT NULL, Tend BIGINT NOT NULL,
    Airline VARCHAR(50) NOT NULL DEFAULT '0',Length SMALLINT NOT NULL, PRIMARY KEY (id))""")
    
    cursor.execute(f"""CREATE TABLE `{database}`.`part_of`(tID BIGINT NOT NULL, cID MEDIUMINT NOT NULL, Position SMALLINT NOT NULL,
    PRIMARY KEY(tID, cID),FOREIGN KEY (tID) REFERENCES `tweets`(id), FOREIGN KEY (cID) REFERENCES `conversations`(id))""")
    return 'done'

def drop_tables() -> str:
    """Drop the table called conversations and the one called part_of"""
    cursor.execute("DROP TABLE `part_of`")
    cursor.execute("DROP TABLE `conversations`")
    db.commit()
    return 'dropped'

def clear_tables() -> str:
    """Clears all values from the tables part_off and conversations"""
    cursor.execute("DELETE FROM `part_of`")
    db.commit()
    cursor.execute("DELETE FROM`conversations`")
    db.commit()
    return 'cleared'

In [18]:
create_tables() #here we create the extra tables that we need to store the information about our conversations.

'done'

In [19]:
def fill_tables (tweets: list) -> None:
    """This function will insert into the tables 'part_of' and 'conversations' the correct information given an input
        SQL table tweets.
        input: the tweets table from SQL imported as a list with cursor.
        output: None
        """
    
    #store what conversations a certain tweet_id will have to become a member of,
    member_of = {} #because a reply to it is in it, in this dict.

    #increment this whenever you create a new conversation, to ensure there are no duplicate ids, we start with cID=1
    conv_id = 1
    
    n = len(tweets) #the number of rows in the tweets table
    run = 0 #keep track of how many runs have been done.
    for t in tweets:
        tweet = t[0] #id
        reply_id = t[2] #reply_to_status_id
        tstamp = t[4] #timestamp_ms
        user_id = t[8] #user_id
        if tweet in member_of: #If we have found replies to this tweet
            convs = member_of.pop(tweet)
            for conv in convs:
                
                cursor.execute(f"SELECT * FROM `conversations` WHERE id={conv}") 
                
                conv_info=cursor.fetchall() #store the information about the current conversation in conv_info
                
                conv_airline = conv_info[0][3] #the airlines that are participating in this conversation
                length = 1+conv_info[0][4] #the current length of this conversations
                
                cursor.execute(f"INSERT INTO `part_of`(tID,cID, Position) VALUES ({tweet},{conv},{length})") 
                #add the values to the part_of table, so we can find what conversation this tweet is a part of and where
                #in the conversation it appears.
                
                if user_id in airlines:    #if the user is an airline
                    if conv_airline == '0': #if no airline has participated in this conversation.
                        cursor.execute(f"""UPDATE `conversations` SET Tstart={tstamp}, Airline = '[{airlines[user_id]}]',
                        length = {length} WHERE id={conv}""") 
                        #update the table and keep track of what airline has participated
                    
                    elif f"{airlines[user_id]}" not in conv_airline:#if this airline has not yet participated, but another one has
                        conv_airline_new =  conv_airline.replace(']',f",{airlines[user_id]}]")
                                                                #store the airlines already participating
                                                                #and the new one in a list
                        cursor.execute(f"""UPDATE `conversations` SET Tstart={tstamp}, Airline = '{conv_airline_new}',
                        length = {length} WHERE id={conv}""")
                        
                    else: #if the airline has already participated
                        cursor.execute(f"UPDATE `conversations` SET Tstart={tstamp}, length = {length} WHERE id={conv}")

                else: #If the user is not an airline
                    cursor.execute(f"UPDATE `conversations` SET Tstart={tstamp}, length = {length} WHERE id={conv}")
        else: #this is the last tweet of a conversation (it has no replies)
            convs=[conv_id] #used for later if statement.
            
            if user_id in airlines: #if the user is an airline
                cursor.execute(f"""INSERT INTO `conversations`(id,Tstart, Tend, Airline, length)
                VALUES ({conv_id}, {tstamp}, {tstamp}, '[{airlines[user_id]}]', 1)""") #create conversation in table
            else: #The tweet is not from an airline
                cursor.execute(f"""INSERT INTO `conversations`(id,Tstart, Tend, length) 
                VALUES ({conv_id}, {tstamp}, {tstamp}, 1)""") #create conversation in table
            cursor.execute(f"INSERT INTO `part_of`(tID,cID,Position) VALUES ({tweet},{conv_id},1)") #update the part_of table
            conv_id += 1 #increment the conversation id to ensure no duplicate conversation ids occur
        
        if reply_id != 0: #if this tweet replies to someone
            
            if reply_id in member_of:   #if the tweet this replies to already has another reply, add the new conversations to 
                                        #dict and keep old one
                member_of[reply_id] = member_of[reply_id] + convs
                
            else:
                member_of[reply_id] = convs #If there is not yet another reply to this tweet, store what conversation it
                                            #is now a part of.
        if run% round(n/10)==0: #run is a multiple of n/10 rounded to an integer
            db.commit() #commit the database to ensure it does not run into an error
        run += 1
    return None #db.commit()

In [20]:
FT_tot = fill_tables(total)

In [21]:
cursor.execute("SELECT * FROM `conversations`")
conversations=cursor.fetchall() #store all tweets from new to old in the variable 'total'

In [22]:
def drop_singles (conversations: list) -> None:
    """This function will drop all the information about conversations where the conversation has a length of 1 tweet.
    input: the conversations table imported as a list with cursor.
    output: None.
    """

    for c in conversations:
        conv_id = c[0] #define the conversation id
        conv_length = c[4] #define the length of the conversation
        
        if conv_length == 1: #If this conversation only includes 1 tweet.
            cursor.execute(f"DELETE FROM part_of WHERE cID = {conv_id}") #delete the linked tweets and conversation
                                                                            #from the table.

            cursor.execute(f"DELETE FROM conversations WHERE id = {conv_id}") #delete the conversation from the table.
        db.commit() #commit the changes to the database files.
    return None    


In [23]:
drop_singles(conversations)