In [1]:
import os
import re
import json
import math
import time
import demoji
import curlify
import requests
import pandas as pd

import pickle
from scipy.special import softmax

import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')

from tqdm.notebook import tqdm

all_stopwords = stopwords.words('english')
pd.set_option('display.max_colwidth', None)

bearer_token = "AAAAAAAAAAAAAAAAAAAAADSXjAEAAAAAQ32sCpTWcilVX%2BQj0BjOArOSYCE%3DUax6ZCizLRwwQcWQirExGQEDEB903dxBZKM4LAuGTMTvMa4jVP"

from datetime import date

today = date.today()

[nltk_data] Downloading package stopwords to
[nltk_data]     /home/crarojasca/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
SCRAP_CTTS = True
SCRAP_DENIALS = True

## Functions

In [3]:
s = requests.Session()
def getTweets(query, max_results=None, next_token=None):
    # Twitter Endpoint
    url = "https://api.twitter.com/2/tweets/search/recent"
    
    # Auth Bearer no research access or premium for the moment
    headers = {
        "Authorization": "Bearer {}".format(bearer_token)
    }
    
    # Query parameters
    params = {
        'query': query,
    #     'start_time': start_date,
    #     'end_time': end_date,
        'expansions': 'author_id,in_reply_to_user_id,geo.place_id',
        'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
        'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
        'place.fields': 'full_name,id,country,country_code,geo,name,place_type',
        'next_token': {}
    }
    if max_results: params['max_results'] = max_results
    if next_token: params['next_token'] = next_token

    try:
        time.sleep(6)
        req = requests.Request(
            "GET",
            url,
            params=params,
            headers=headers
        ).prepare()

        response = s.send(req)
        results = json.loads(response.text)
        return results
    except:
        print(json.loads(response.text))

def getUser(username):
    # User Endpoint 
    url = "https://api.twitter.com/2/users/by/username/" + username
    # Auth Bearer no research access or premium for the moment
    headers = {
        "Authorization": "Bearer {}".format(bearer_token)
    }

    params = {
        'tweet.fields': 'id,text,author_id,in_reply_to_user_id,geo,conversation_id,created_at,lang,public_metrics,referenced_tweets,reply_settings,source',
        'user.fields': 'id,name,username,created_at,description,public_metrics,verified',
        'next_token': {}
    }
    
    time.sleep(1)
    try:
        req = requests.Request(
            "GET",
            url,
            params=params,
            headers=headers
        ).prepare()

        response = s.send(req)
        results = json.loads(response.text)["data"]

        return json.loads(response.text)["data"]
    except:
        print(json.loads(response.text)["errors"][0]["detail"])


def count_tweets(query):
    # User Endpoint 
    url = "https://api.twitter.com/2/tweets/counts/recent"
    # Auth Bearer no research access or premium for the moment
    headers = {
        "Authorization": "Bearer {}".format(bearer_token)
    }

    params = {
        "query": query,
        "granularity": "day"
    }

    time.sleep(1)
    req = requests.Request(
        "GET",
        url,
        params=params,
        headers=headers
    ).prepare()

    response = s.send(req)
    results = json.loads(response.text)
    return results["meta"]["total_tweet_count"]
    
    
def extract_data(query, max_result=100):
    data = pd.DataFrame()

    # Scrap all the data up to the last page
    next_token = None

    n_tweets = count_tweets(query)
    if n_tweets==0:
        return data
    
    #
    pbar = tqdm(total=math.ceil(n_tweets/max_result))
    while True:
        pbar.update(1)
        try:
            results = getTweets(query=query, max_results=max_result, next_token=next_token)
            data = pd.concat([data, pd.DataFrame(results["data"])], ignore_index=True)
            if not "next_token" in results["meta"]:
                break
            next_token = results["meta"]["next_token"] 
        except:
            print(results)
            break
    pbar.close()
    
    # Reordering de columns
    columns = list(data.columns)
    columns.remove("text")
    columns.remove("public_metrics")
    data = data[["text", "public_metrics"] + columns]
    return data

def preprocess_text(text):
    # Lower
    preprocessed_text = text.lower()
    # Remove Handle
    preprocessed_text = re.sub("@\w+", "", preprocessed_text)
    # Remove Hashtag
    preprocessed_text = re.sub("#\w+", "", preprocessed_text)
    # Remove Links
    preprocessed_text = re.sub(r'http[s]?:\S+', '', preprocessed_text, flags=re.MULTILINE)
    # Remove emotes 
    preprocessed_text = demoji.replace(preprocessed_text, "")
    # Remove new line
    preprocessed_text = re.sub("\\n", "", preprocessed_text)
    # Remove extra spaces 
    preprocessed_text = preprocessed_text.strip()
    
    return preprocessed_text

def get_data(query):
    results = extract_data(query)
    results["preprocessed_text"] = results["text"].apply(preprocess_text)
    results = results.sort_values(by="created_at", ascending=False)
    results = results.drop_duplicates(subset=['text'], keep='last')
    results = results[(results.preprocessed_text!="")].copy(deep=True)
    return results

## ClimateScam
### Original Page

In [4]:
results = get_data("from:ClimateScam")
print(results.shape)
results[["preprocessed_text", "text", "public_metrics"]]

  0%|          | 0/1 [00:00<?, ?it/s]

(5, 12)


Unnamed: 0,preprocessed_text,text,public_metrics
0,"global suv sales increased despite obstacles &amp; rising inflation, growing by 3% between 2021 &amp; 2022. in 2022, suvs = 46% of global car sales. most growth came in the u.s., india and europe. evs = 16% of total suv sales in '22. greenies: don't tell us what to drive!","Global SUV sales increased despite obstacles &amp; rising inflation, growing by 3% between 2021 &amp; 2022. In 2022, SUVs = 46% of global car sales. Most growth came in the U.S., India and Europe. EVs = 16% of total SUV sales in '22. Greenies: don't tell us what to drive! #climatescam","{'retweet_count': 6, 'reply_count': 1, 'like_count': 37, 'quote_count': 0, 'impression_count': 846}"
1,"while oxy wants ""co2 vacuum cleaners"", a canadian company will dump 300 tons of magnesium hydroxide (""milk of magnesia"") in its mineral form, brucite, in st ives bay to slow climate change. this alkaline will shift the sea's ph value. 3 eyed fish, anyone? stupid!","While OXY wants ""CO2 vacuum cleaners"", a Canadian company will dump 300 tons of magnesium hydroxide (""Milk of magnesia"") in its mineral form, brucite, in St Ives Bay to slow climate change. This alkaline will shift the sea's pH value. 3 eyed fish, anyone? STUPID! #CLIMATESCAM","{'retweet_count': 18, 'reply_count': 6, 'like_count': 42, 'quote_count': 1, 'impression_count': 1219}"
2,and replace it with renewables? seriously? you clearly don't care about saving the earth. you are part of the problem.,@MikeHudema @NASAClimate And replace it with renewables? Seriously? You clearly don't care about saving the Earth. You are part of the problem.,"{'retweet_count': 4, 'reply_count': 3, 'like_count': 55, 'quote_count': 0, 'impression_count': 489}"
3,"occidental (oxy) plans for vacuum machines to suck co2 from the atmosphere. this tech [""direct-air capture (dac)""] uses ""carbon removal"" to limit global warming. 100 facilities to be constructed by 2035. upfront construction costs = $110 bil. watch for the ipo.","Occidental (OXY) plans for vacuum machines to suck CO2 from the atmosphere. This tech [""direct-air capture (DAC)""] uses ""carbon removal"" to limit global warming. 100 facilities to be constructed by 2035. Upfront construction costs = $110 BIL. Watch for the IPO. #climatescam","{'retweet_count': 19, 'reply_count': 20, 'like_count': 55, 'quote_count': 3, 'impression_count': 2122}"
5,"biden announced that evs can be plugged into your home to provide electricity. here's the cost if you use a ford f-150 lightening: charging station $1,650; home integration system $9,400; new panel $7,940; circuits $840; misc: $1,600. that's $21,430. line up! scam!","Biden announced that EVs can be plugged into your home to provide electricity. Here's the cost if you use a Ford F-150 Lightening: charging station $1,650; Home Integration System $9,400; new panel $7,940; circuits $840; misc: $1,600. That's $21,430. Line Up! SCAM! #climatescam","{'retweet_count': 48, 'reply_count': 9, 'like_count': 160, 'quote_count': 1, 'impression_count': 3022}"


### Complete lines

In [5]:
climate_scam_handle = get_data("@ClimateScam OR #ClimateScam OR from:ClimateScam -is:retweet")
print(climate_scam_handle.shape)
climate_scam_handle[["preprocessed_text", "text", "public_metrics"]].head()

  0%|          | 0/360 [00:00<?, ?it/s]



(1694, 13)


Unnamed: 0,preprocessed_text,text,public_metrics
6,record low temps broken again,Record low temps broken again #ClimateScam https://t.co/lqHHaypvps,"{'retweet_count': 0, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 0}"
12,rt : we need an investigation like covid to bring awareness to the general public that is driven by tra…,RT @Blightykid: @goddeketal We need an investigation like COVID to bring awareness to the general public that #ClimateScam is driven by tra…,"{'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 0}"
13,we need an investigation like covid to bring awareness to the general public that is driven by trans global corporations wef un who and elites to transfer billions into their pockets as the west plummetes into poverty b/c it’s not about the climate or environment,@goddeketal We need an investigation like COVID to bring awareness to the general public that #ClimateScam is driven by trans global corporations WEF UN WHO and elites to transfer billions into their pockets as the West plummetes into poverty b/c it’s not about the climate or environment,"{'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 5}"
22,"peter, if we were in a genuine ""climate emergency"", it wouldn't be necessary for you to announce it. did you happen to call emergency services about this alleged emergency? no? hmm.","@SenatorSurfer Peter, if we were in a genuine ""climate emergency"", it wouldn't be necessary for you to announce it. Did you happen to call emergency services about this alleged emergency? No? Hmm. #ClimateScam","{'retweet_count': 0, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 2}"
23,"there is no ""climate crisis"". there is no ""public health"" threat from stoves. gas stoves represent individual agency, and are difficult to administrate centrally, so they're being attacked.","@KQED There is no ""climate crisis"". There is no ""public health"" threat from stoves. Gas stoves represent individual agency, and are difficult to administrate centrally, so they're being attacked. #ClimateScam","{'retweet_count': 0, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 0}"


In [6]:
climate_scam_hashtag = get_data("#ClimateScam -is:retweet")
print(climate_scam_hashtag.shape)

  0%|          | 0/70 [00:00<?, ?it/s]

(5347, 13)


In [7]:
climate_scam_hashtag[["preprocessed_text", "text", "public_metrics"]].sample(5)

Unnamed: 0,preprocessed_text,text,public_metrics
6721,"there's no ""climate crisis"", so you could cross that one off your list.","@Xavier_Bettel @EIB @wernerhoyer There's no ""climate crisis"", so you could cross that one off your list. #ClimateScam","{'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 50}"
3393,"replying to it won't be dry for long...but know how california operates, there's most likely no plan for how they can contain it for later use and prevent mud slides.","Replying to @Don_Penim \nIt won't be dry for long...\nBut know how California operates, there's most likely no plan for how they can contain it for later use and prevent mud slides. #ClimateScam https://t.co/xgu6iw1LEq","{'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 45}"
3945,which will do zero because the green dumb dream is impossible!!,@TheDemocrats Which will do ZERO because the green dumb dream is IMPOSSIBLE!! #ClimateHoax #ClimateScam https://t.co/JJVbhdtqjq,"{'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 20}"
4789,&amp; : ‘s predictions are all bs; blowing up nordstrom proves biden knows the is a scam &amp; what’s up with ‘s ban of petro as china builds 100s of coal plants &amp; brics dumps petro ? treason ?,@tomselliott @JohnKerry @joebiden &amp; @JohnKerry #ClimateScam : @JohnKerry ‘s #ClimateCrisis predictions are all BS; Blowing up Nordstrom proves Biden knows the #ClimateCrisis is a scam &amp; what’s up with @JoeBiden ‘s ban of 🇺🇸Petro as China builds 100s of COAL plants &amp; BRICS dumps 🇺🇸Petro 💸 ? Treason ? https://t.co/maiibmX1qF,"{'retweet_count': 1, 'reply_count': 0, 'like_count': 0, 'quote_count': 0, 'impression_count': 6}"
965,you really need some new material.,@MikeHudema #ClimateScam\n\nYou really need some new material.,"{'retweet_count': 2, 'reply_count': 0, 'like_count': 1, 'quote_count': 0, 'impression_count': 22}"


In [8]:
def count_words(col):    
    df = col.str.split(expand=True).stack().value_counts().reset_index()
    df.columns = ['Word', 'Frequency'] 
    df = df[~df["Word"].str.lower().isin(all_stopwords)]
    return df

count_words(climate_scam_hashtag.text)[:30]

Unnamed: 0,Word,Frequency
1,#ClimateScam,4526
12,climate,778
17,#climatescam,536
28,&amp;,377
29,like,376
36,islands,334
37,Climate,333
39,#ClimateCrisis,327
40,would,319
43,#ClimateEmergency,272


## CTTs
### Extracting
#### Users

In [10]:
ctts = pd.read_csv("../CTTs/ctt_twitter_handles.csv")
ctts

Unnamed: 0,screen_name,Followers,Type,Name
0,capitalresearch,5023,CTT,Capital Research Center
1,ceidotorg,12565,CTT,Competitive Enterprise Institute
2,NationalCenter,7807,CTT,National Center for Public Policy Research
3,ReasonFdn,7030,CTT,Reason Foundation
4,ACSHorg,5734,CTT,American Council on Science and Health
5,AEI,151109,CTT,American Enterprise Institute
6,CatoInstitute,362920,CTT,CATO Institute
7,CFACT,13985,CTT,CFACT
8,FoF_Liberty,236,CTT,Frontiers of Freedom
9,FraserInstitute,31658,CTT,Fraser Institute


In [None]:
for i in range(ctts.shape[0]):
    username = ctts.loc[i].screen_name
    user_meta = getUser(username)
    ctts.loc[i, 'followers_count'] = user_meta['public_metrics']['followers_count']
    ctts.loc[i, 'following_count'] = user_meta['public_metrics']['following_count']
    ctts.loc[i, 'tweet_count'] = user_meta['public_metrics']['tweet_count']
    ctts.loc[i, 'listed_count'] = user_meta['public_metrics']['listed_count']
    ctts.loc[i, 'description'] = user_meta['description']
    ctts.loc[i, 'id'] = user_meta['id']
    ctts.loc[i, 'verified'] = user_meta['verified']
    ctts.loc[i, 'created_at'] = user_meta['created_at']
ctts

In [None]:
## Total Tweets
ctts.tweet_count.sum()

#### Tweets

In [None]:
handles_process = ctts.screen_name.unique()
    
total_sum = 0
for username in tqdm(handles_process):
    number = count_tweets("@{} -is:retweet".format(username))
    total_sum+=number
print("Number of tweets: {}".format(total_sum))
print("Aproximate time to process: {} minutes".format(total_sum*6/60/100))

In [None]:
if SCRAP_CTTS:
    file = "Twitter_scrapped/tweets_scrapped-CTTs_{}.csv".format(today)
    if os.path.isfile(file):
        tweets = pd.read_csv(file)
        handles_process = list(set(ctts.screen_name.unique()) - set(tweets.username.unique()))
    else:
        tweets = pd.DataFrame()
        handles_process = ctts.screen_name.unique()

    for username in tqdm(handles_process):
        handle_tweets = extract_data("@{} -is:retweet".format(username), 100)
        handle_tweets["username"] = username
        tweets = pd.concat([tweets, handle_tweets])
        tweets.to_csv("tweets_scrapped-CTTs.csv", index=False)

### Preprocess

In [None]:
file = "Twitter_scrapped/tweets_scrapped-CTTs_{}.csv".format(today)
tweets = pd.read_csv(file)

In [None]:
tweets.shape[0]

In [None]:
tweets["preprocessed_text"] = tweets["text"].apply(preprocess_text)

In [None]:
real_tweets = tweets[tweets.referenced_tweets.isna() & (tweets.preprocessed_text!="")].copy(deep=True)
print("Number of tweets: {} from {} to {}.".format(
    real_tweets.shape[0],
    tweets.created_at.min(),
    tweets.created_at.max()
))

In [None]:
count_words(tweets.text)[:30]

## Deniers
### Extracting
#### Users

In [None]:
denials = pd.read_csv("CTTs/denier_twitter_handles.csv")
for i in range(denials.shape[0]):
    username = denials.loc[i].screen_name
    user_meta = getUser(username)
    if not user_meta:
        continue
    denials.loc[i, 'exist'] = True
    denials.loc[i, 'followers_count'] = user_meta['public_metrics']['followers_count']
    denials.loc[i, 'following_count'] = user_meta['public_metrics']['following_count']
    denials.loc[i, 'tweet_count'] = user_meta['public_metrics']['tweet_count']
    denials.loc[i, 'listed_count'] = user_meta['public_metrics']['listed_count']
    denials.loc[i, 'description'] = user_meta['description']
    denials.loc[i, 'id'] = user_meta['id']
    denials.loc[i, 'verified'] = user_meta['verified']
    denials.loc[i, 'created_at'] = user_meta['created_at']
denials

In [None]:
print("{} in total {}.".format((denials.exist==True).sum(), denials.shape[0]))

#### Tweets

In [None]:
handles_process = denials[denials.exist==True].screen_name.unique()
    
total_sum = 0
for username in tqdm(handles_process):
    number = count_tweets("from:{} -is:retweet".format(username))
    total_sum+=number
print("Number of tweets: {}".format(total_sum))
print("Aproximate time to process: {} minutes".format(total_sum*6/60/100))

In [None]:
denials_file = "Twitter_scrapped/deniers/{}.csv".format(today)
denials_to_process = denials[denials.exist==True]
if SCRAP_DENIALS:
    if os.path.isfile(denials_file):
        tweets_denials = pd.read_csv(denials_file)
        handles_process = list(
            set(denials_to_process.screen_name.unique()) - set(tweets_denials.username.unique()))
    else:
        tweets_denials = pd.DataFrame()
        handles_process = denials_to_process.screen_name.unique()

    for username in tqdm(handles_process):
        handle_tweets = extract_data("from:{} -is:retweet".format(username), 100)
        handle_tweets["username"] = username
        tweets_denials = pd.concat([tweets, handle_tweets])
        tweets_denials.to_csv(file, index=False)

In [None]:
tweets_denials.sample(5)

## Model
### #ClimateScam

In [None]:
import unicodedata

# Load and pre-process the text data
# Define text pre-processing functions
def remove_between_square_brackets(text):
    return re.sub('\[[^]]*\]', '', text)
def remove_non_ascii(text):
    """Remove non-ASCII characters from list of tokenized words"""
    return unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8', 'ignore')
def strip_underscores(text):
    return re.sub(r'_+', ' ', text)
def remove_multiple_spaces(text):
    return re.sub(r'\s{2,}', ' ', text)

# Merge text pre-processing functions
def denoise_text(text):
    text = remove_between_square_brackets(text)
    text = remove_non_ascii(text)
    text = strip_underscores(text)
    text = remove_multiple_spaces(text)
    return text.strip()


# Pre-process the text
climate_scam_hashtag['roberta_preprocessed'] = climate_scam_hashtag["text"].astype(str).apply(denoise_text)

In [None]:
import torch
from simpletransformers.classification import ClassificationModel

device = torch.device("cpu")
# Define the model 
architecture = 'roberta'
# model_name = 'CARDS_RoBERTa_Classifier'
model_name = "cards/models/CARDS_RoBERTa_Classifier"

# Load the classifier
roberta_model = ClassificationModel(architecture, model_name, use_cuda=False)

In [None]:
predictions, raw_outputs = roberta_model.predict(list(climate_scam_hashtag.roberta_preprocessed))

In [None]:
le = pickle.load(open("le_cards.pkl", 'rb'))

In [None]:
climate_scam_hashtag['roberta_pred'] = le.inverse_transform(predictions)
climate_scam_hashtag['roberta_proba'] = [max(softmax(element[0])) for element in raw_outputs]

In [None]:
climate_scam_hashtag.roberta_pred.value_counts().to_frame().head()

In [None]:
climate_scam_hashtag.roberta_pred.hist()

In [None]:
climate_scam_hashtag[["text", "roberta_pred", "roberta_proba"]]

In [None]:
climate_scam_hashtag.loc[
    climate_scam_hashtag.roberta_pred=="0_0", ["text", "roberta_pred", "roberta_proba"]].sample(10)

In [None]:
results['roberta_preprocessed'] = results["text"].astype(str).apply(denoise_text)
predictions, raw_outputs = roberta_model.predict(list(results.roberta_preprocessed))
results['roberta_pred'] = le.inverse_transform(predictions)
results['roberta_proba'] = [max(softmax(element[0])) for element in raw_outputs]

In [None]:
results[["text", "roberta_pred", "roberta_proba"]]

In [None]:
file = "Twitter_scrapped/@ClimateScam_CARDS_predictions_{}.csv".format(today)
climate_scam_hashtag.to_csv(file)

### CTTs

In [None]:
tweets['roberta_preprocessed'] = tweets["text"].astype(str).apply(denoise_text)

In [None]:
predictions, raw_outputs = roberta_model.predict(list(tweets.roberta_preprocessed))

In [None]:
tweets['roberta_pred'] = le.inverse_transform(predictions)
tweets['roberta_proba'] = [max(softmax(element[0])) for element in raw_outputs]

In [None]:
# tweets = pd.read_csv("@Twitter_CTTs_predictions.csv")
tweets.roberta_pred.value_counts().to_frame().head()

In [None]:
tweets[tweets.referenced_tweets.isna()].shape

In [None]:
tweets.roberta_pred.hist()

In [None]:
tweets[["text", "roberta_pred", "roberta_proba"]].sample(5)

In [None]:
tweets.loc[tweets.roberta_pred!="0_0", ["text", "roberta_pred", "roberta_proba"]].sample(5)

In [None]:
file_CTT = "Twitter_scrapped/@Twitter_CTTs_predictions_{}.csv".format(today)
tweets.to_csv(file_CTT)

In [None]:
# samples = (
#     tweets.groupby("roberta_pred")
#     .sample(10, replace=True)[["text", "roberta_pred", "roberta_proba"]]
#     .drop_duplicates("text")
# )
# samples.to_csv("@Twitter_CTTs_predictions_samples.csv")

### Denials

In [None]:
tweets_denials['roberta_preprocessed'] = tweets_denials["text"].astype(str).apply(denoise_text)
predictions, raw_outputs = roberta_model.predict(list(tweets_denials.roberta_preprocessed))

In [None]:
tweets_denials['roberta_pred'] = le.inverse_transform(predictions)
tweets_denials['roberta_proba'] = [max(softmax(element[0])) for element in raw_outputs]

In [None]:
# tweets_denials = pd.read_csv("@Twitter_Denials_predictions.csv")

In [None]:
tweets_denials.roberta_pred.value_counts().to_frame().head()

In [None]:
tweets_denials.roberta_pred.hist()

In [None]:
tweets_denials[["text", "roberta_pred", "roberta_proba"]].sample(5)

In [None]:
tweets_denials.loc[tweets_denials.roberta_pred!="0_0", ["text", "roberta_pred", "roberta_proba"]].sample(5)

In [None]:
file_denials = "Twitter_scrapped/@Twitter_Denials_predictions_{}.csv".format(today)
tweets_denials.to_csv(file_denials)

In [None]:
samples = (
    tweets_denials.groupby("roberta_pred")
    .sample(10, replace=True)[["text", "roberta_pred", "roberta_proba"]]
    .drop_duplicates("text")
)
samples.to_csv("@Twitter_Denials_predictions_samples.csv")