In [None]:
# autenticating to google
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import time
import re
import numpy as np
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from concurrent.futures import ProcessPoolExecutor
import asyncio
import logging
##########################################

In [None]:
# Library Installation
!pip install -q -U google-generativeai
##########################################

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Y2S2_CSS/CSS Project/Project Submission Folder/Datasets/socialfrustration_activenodes.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,source,follower,text,tweet_type,sentiment,sign
0,22,pink_lady56,mshaw53,RT @pink_lady56: #debatenight #CrookedHillary...,retweet,Negative,1
1,23,Bdog99993,mshaw53,RT @Bdog99993: Hillary's ride to the debate to...,retweet,Negative,1
2,44,TwitchyTeam,Bulldog_tracks,RT @TwitchyTeam: Donât forget the cough drop...,retweet,Negative,1
3,50,HillaryClinton,Bulldog_tracks,@HillaryClinton a reminder-dont cough #Crooked...,reply,Negative,-1
4,52,pink_lady56,Z1Nomad,RT @pink_lady56: #debatenight #CrookedHillary...,retweet,Negative,1


In [None]:
extracted = df.drop_duplicates(subset='text', keep='first')


In [None]:
extracted.head()

Unnamed: 0.1,Unnamed: 0,source,follower,text,tweet_type,sentiment,sign
0,22,pink_lady56,mshaw53,RT @pink_lady56: #debatenight #CrookedHillary...,retweet,Negative,1
1,23,Bdog99993,mshaw53,RT @Bdog99993: Hillary's ride to the debate to...,retweet,Negative,1
2,44,TwitchyTeam,Bulldog_tracks,RT @TwitchyTeam: Donât forget the cough drop...,retweet,Negative,1
3,50,HillaryClinton,Bulldog_tracks,@HillaryClinton a reminder-dont cough #Crooked...,reply,Negative,-1
6,57,HillaryClinton,lvforartttt2060,"@HillaryClinton Stephen Coughlin Aug 21 LA ""Ca...",reply,Negative,-1


In [None]:
 # Key Authentication
genai.configure(api_key="AIzaSyABwpQaZIKT8LeRHFtTuccRHf6F7elmOwA") # Enter your own api key

# # Defining model to use
model = genai.GenerativeModel('gemini-pro')
safety_setting = {
    "HARM_CATEGORY_HARASSMENT": "BLOCK_NONE",
    "HARM_CATEGORY_HATE_SPEECH": "BLOCK_NONE",
    "HARM_CATEGORY_SEXUALLY_EXPLICIT": "BLOCK_NONE",
    "HARM_CATEGORY_DANGEROUS_CONTENT": "BLOCK_NONE",
}


emotion_prompt = "Based on this tweet, identify the dominant emotion expressed? Without any other words: "

def Emotion_Check(tweet):
  time.sleep(0.2)
  completion_flag = 0
  while completion_flag == 0:
    try:
        response = model.generate_content(emotion_prompt + tweet,
                                           safety_settings=safety_setting)
        time.sleep(0.5)
        completion_flag = 1
        # print("DEBUG_EMO:", tweet, "--", str(response.text))
        return str(response.text)
    except:
        time.sleep(0.5)

async def process_emotion(tweet):
  loop = asyncio.get_event_loop()
  check = loop.run_in_executor(None, Emotion_Check, tweet)
  results = await asyncio.gather(check)
  return [tweet, results]

##########################################################################
clarity_prompt = "Based on this tweet, is the main idea clearly communicated? Select one option: yes, no, or I don't know. Without any other words. "

def Clarity_Check(tweet):
  time.sleep(0.2)
  completion_flag = 0
  while completion_flag == 0:
    try:
        response = model.generate_content(clarity_prompt + tweet,
                                           safety_settings=safety_setting)
        time.sleep(0.5)
        completion_flag = 1
        # print("DEBUG_CLAR:", tweet, "--", str(response.text))
        return str(response.text)
    except:
        time.sleep(0.5)

async def process_clarity(tweet):
  loop = asyncio.get_event_loop()
  check = loop.run_in_executor(None, Clarity_Check, tweet)
  results = await asyncio.gather(check)
  return [tweet, results]

##########################################################################
ambiguity_prompt = "Identify any phrases (longer than 1 word) that are vague, imprecise, or subject to interpretation in the following tweet. Return your response as a single string separated by commas: "
#ambiguity_prompt = "Identify any ambiguous language or phrases in the following tweet, and return your response as a single string separated by comma: "

def Ambiguity_Check(tweet):
  time.sleep(0.2)
  completion_flag = 0
  while completion_flag == 0:
    try:
        response = model.generate_content(ambiguity_prompt + tweet,
                                           safety_settings=safety_setting)
        time.sleep(0.5)
        completion_flag = 1
        # print("DEBUG_AMB:", tweet, "--", str(response.text))
        return str(response.text)
    except:
        time.sleep(0.5)

async def process_ambiguity(tweet):
  loop = asyncio.get_event_loop()
  check = loop.run_in_executor(None, Ambiguity_Check, tweet)
  results = await asyncio.gather(check)
  return [tweet, results]


# Batch the tweets for processing
batch_size = 10
tweet_batches = [extracted['text'][i:i + batch_size] for i in range(0, len(extracted), batch_size)]

In [None]:
async def main():
  logging.getLogger('tornado.access').disabled = True

  max_workers = 14

  with ProcessPoolExecutor(max_workers) as executor:
    final_results = []

    for batch in tweet_batches:
      emotion_coroutines = [process_emotion(tweet) for tweet in batch]
      clarity_coroutines = [process_clarity(tweet) for tweet in batch]
      ambiguity_coroutines = [process_ambiguity(tweet) for tweet in batch]

    # Gather all coroutines for each process
      gathered_emotion_coroutines = asyncio.gather(*emotion_coroutines)
      gathered_clarity_coroutines = asyncio.gather(*clarity_coroutines)
      gathered_ambiguity_coroutines = asyncio.gather(*ambiguity_coroutines)

      # Gather all gathered coroutines for all processes
      combined_coroutines = [gathered_emotion_coroutines, gathered_clarity_coroutines, gathered_ambiguity_coroutines]
      results = await asyncio.gather(*combined_coroutines)
      # print("DEBUG: Results\n", results)

      for i in range(len(batch)):
          emotion_result = results[0][i]
          clarity_result = results[1][i]
          ambiguity_result = results[2][i]
          if emotion_result[0] is not None:
              final_results.append((emotion_result[0], emotion_result[1][0].lower(), clarity_result[1][0].lower(), ambiguity_result[1][0]))

      print("Batch completed")
      time.sleep(0.5)

    df = pd.DataFrame(final_results, columns=['text', 'emotion', 'clarity_check', 'ambiguous_phrases'])
    return df


# Run the asyncio event loop
results_df = await main()

Batch completed
Batch completed
Batch completed
Batch completed
Batch completed
Batch completed
Batch completed
Batch completed
Batch completed
Batch completed


In [None]:
results_df.to_csv("/content/drive/MyDrive/Y2S2_CSS/CSS Project/Project Submission Folder/Datasets/misinfo_activenodes.csv")

In [None]:
results_df

Unnamed: 0.1,Unnamed: 0,text,emotion,clarity_check,ambiguous_phrases
0,0,RT @pink_lady56: #debatenight #CrookedHillary...,disgust,i don't know.,Body Double
1,1,RT @Bdog99993: Hillary's ride to the debate to...,sarcasm,yes,"Hillary's ride to the debate tonight, just in ..."
2,2,RT @TwitchyTeam: Donât forget the cough drop...,concern,yes,#DebateNight
3,3,@HillaryClinton a reminder-dont cough #Crooked...,anger,yes,a reminder-dont cough
4,4,RT @pink_lady56: #debatenight #CrookedHillary...,anger,i don't know,"#debatenight, #CrookedHillary's Check List, In..."
...,...,...,...,...,...
128,128,"RT @JoshNoneYaBiz: Sorry Hillary, you dont win...",sarcasm,yes,without having a seizure
129,129,RT @EricWolfson: #HillaryCough? Try #DonaldSn...,sarcasm,i don't know,"Try #DonaldSniff, How much coke did he do back..."
130,130,I seriously think Hillary you were having an u...,contempt,no,"seriously think, uncontrollable Little seizure"
131,131,You are having a seizure you are unfit to be t...,anger,no,"having a seizure, during that little seizure"


In [None]:
results_df.shape

(133, 5)

In [None]:
# Extracting misinformation tweets
select_emotions = ('contempt', 'anger',  'sarcasm', 'disgust', 'degredation', 'disdain')

misinfo_tweets_df = results_df[(results_df['emotion'].isin(select_emotions))]
misinfo_tweets_df.drop_duplicates(subset='text', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  misinfo_tweets_df.drop_duplicates(subset='text', inplace=True)


In [None]:
misinfo_tweets_df

Unnamed: 0.1,Unnamed: 0,text,emotion,clarity_check,ambiguous_phrases
0,0,RT @pink_lady56: #debatenight #CrookedHillary...,disgust,i don't know.,Body Double
1,1,RT @Bdog99993: Hillary's ride to the debate to...,sarcasm,yes,"Hillary's ride to the debate tonight, just in ..."
3,3,@HillaryClinton a reminder-dont cough #Crooked...,anger,yes,a reminder-dont cough
6,6,"@HillaryClinton Stephen Coughlin Aug 21 LA ""Ca...",anger,i don't know.,Can Islam Coexist with Civilization
15,15,@HillaryClinton Will you be coughing or passin...,anger,no,"coughing or passing out, tonight"
...,...,...,...,...,...
126,126,RT @EricWolfson: #DebatesÂ Expectations: Hill...,sarcasm,no,Don't Lie!
127,127,RT @EricWolfson: #HillaryCough? Try #DonaldSn...,anger,yes,How much coke did he do backstage
128,128,"RT @JoshNoneYaBiz: Sorry Hillary, you dont win...",sarcasm,yes,without having a seizure
130,130,I seriously think Hillary you were having an u...,contempt,no,"seriously think, uncontrollable Little seizure"


In [None]:
def extract_match(query, text):
  result = re.search(query, text, flags=re.IGNORECASE)
  if result:
    return result.string
  return None

#Set phrase/keyword to look for in df['text']
query = ".*parkinson.*"

extracted = results_df[results_df['text'].apply(lambda x: (extract_match(query, x)) is not None)]
extracted.drop_duplicates(subset='text', inplace=True)
extracted.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  extracted.drop_duplicates(subset='text', inplace=True)


Unnamed: 0,text,emotion,clarity_check,ambiguous_phrases
43,@HillaryClinton Forgot to mention the possibil...,contempt,yes,"forgot to mention, possibility, seizure, pass ..."
45,@HillaryClinton Or you asleep due to all the d...,anger,no,Or you asleep due to all the drugs you are tak...
101,@HillaryClinton Do You Have Parkinsons Disease...,anger,yes,"Parkinsons Disease, Why Did You Delete Hammer ..."
103,@HillaryClinton @PolitiFact Poor Judgment Hill...,anger,no,"Poor Judgment, Voted For Iraq War, Did Not"
112,RT @GoldStarMomTX55: What is she on? #hillaryp...,sarcasm,yes,"#Sarcasm on steroids, So programmed, So drugge..."


In [None]:
list(misinfo_tweets_df['text'])


['@HillaryClinton a reminder-dont cough #CrookedHillary',
 "RT @pink_lady56: #debatenight  #CrookedHillary's Check List  Cough Drops â\x9c\x94  Ear Piece   â\x9c\x94    Stool â\x9c\x94  Pant Suit  â\x9c\x94  Body Double â\x9c\x94  Indoor Vâ\x80¦",
 '@HillaryClinton @TheBriefing2016 1 cough, you lose. Hillary.',
 'RT @DeeconX: #SickHillary just arrived for #debatenight take a shot every time she coughs or collapses.  #PresidentialDebate @Cernovich @Caâ\x80¦',
 "RT @pink_lady56: #debatenight  #CrookedHillary's Check List  Cough Drops â\x9c\x94  Ear Piece   â\x9c\x94    Stool â\x9c\x94  Pant Suit  â\x9c\x94  Body Double â\x9c\x94  Indoor Vâ\x80¦",
 '@HillaryClinton @TheBriefing2016 Who thinks Hillary will:                            Lie? Cough? Fall? Have a Seizure? Lie?',
 "@HillaryClinton don't cough at debate https://t.co/lUp6HbW2NH",
 "@HillaryClinton don't cough. https://t.co/J8J8tIP1RX",
 '@HillaryClinton Do You Have Parkinsons Disease? Why Did You Delete Hammer Destroy 30K Emails. 