In [None]:
"""
"Sentiment Analysis Project for Tweets using Machine Learning"

Written by Beyzagul Demir, Ersel R. Ekmen, H. Alper Karadeniz

Supervisor: Assoc. Prof. Reyyan Yeniterzi
"""

### PART 1: Scrape tweets from Twitter using Twint ###

!pip install twint # Twitter scraping tool allowing us to scrape Tweets from Twitter profiles without using Twitter's API.
!pip install --upgrade pip # Upgrade pip if needed.
!pip install nest-asyncio # Patch asyncio to allow nested event loops
__import__('IPython').embed()

!pip install --user --upgrade git+https://github.com/twintproject/twint.git@origin/master #egg=twint

!pip install deep_translator 
!pip install textblob

In [None]:
import nest_asyncio
nest_asyncio.apply()

In [None]:
import twint

import os
if os.path.exists('search_result.csv'):
    os.remove('search_result.csv')

# Setting all configurations to filter tweets that will be scraped based on tweeting date and keyword:
c = twint.Config()
c.Search = "afgan"
c.Since = "2021-08-18"
c.Until = "2021-08-19"
c.Store_csv = True # Tweets are listed in a csv file.
c.Output = "search_result.csv" # Tweets are listed "search_result.csv" file as output in computer.

twint.run.Search(c)

In [None]:
import re
import string

# Set target language of tweets and filter them based on the number of like and length of tweet:
target_language = "tr" # Target language of tweets are Turkish for this project.
target_min_likes = 100
target_min_length = 10

def cleanText(text): # Eliminate useless characters which will not be evaluated in sentiment analysis from tweets:
  text = re.sub(r'@[A-Za-z0-9]+', '', text)
  text = re.sub(r'#', '', text)
  text = re.sub(r'RT[\s]+', '', text)
  text = re.sub(r'https?:\/\/\S+', '', text)
  text = re.sub(r'[^\w\s]','',text)

  return text

import csv

all_tweets_list = [] # Create a list to contain all scraped tweets.

with open('search_result.csv', mode='r', encoding='utf-8') as infile: #Cleaning tweets from useless characters to make them ready to be analyzed:
    reader = csv.reader(infile)
    
    counter = 0
    for rows in reader:
        tweet_details = rows

        clean_tweet = cleanText(tweet_details[10])

        lines = clean_tweet.split("\n")
        non_empty_lines = [line for line in lines if line.strip() != ""]

        string_without_empty_lines = ""
        for line in non_empty_lines:
            string_without_empty_lines += line + " "

        clean_tweet = re.sub(' +', ' ', string_without_empty_lines)
        
        tweet_details.append(clean_tweet)
        
        if counter != 0:
            all_tweets_list.append(tweet_details)
        counter+=1

if os.path.exists('search_result_new.csv'):
    os.remove('search_result_new.csv')

with open('search_result_new.csv', mode='w', encoding='utf-8', newline='') as outfile: # Write down filtered tweets to a new file with headers:
    writer = csv.writer(outfile)
    
    writer.writerow(["id", "conversation_id", "created_at", "date", "time", "timezone", "user_id", "username", "name", "place", "tweet", "language", "mentions", "urls", "photos", "replies_count", "retweets_count", "likes_count", "hashtags", "cashtags", "link", "retweet", "quote_url", "video", "thumbnail", "near", "geo", "source", "user_rt_id", "user_rt", "retweet_id", "reply_to", "retweet_date", "translate", "trans_src", "trans_dest", "clean_tweet"])
    
    for i in range(len(all_tweets_list)):
        
        tweet = all_tweets_list[i]
        
        writer.writerow(tweet)

if os.path.exists('search_result_filtered.csv'):
    os.remove('search_result_filtered.csv')

# Get specific data from "search_result_new.csv" file
# to "search_result_filtered.csv" including 6 rows' information from previous file:

with open('search_result_filtered.csv', mode='w', encoding='utf-8', newline='') as outfile:
    writer = csv.writer(outfile)
    
    writer.writerow(["subjectivity", "polarity", "replies_count", "retweets_count", "likes_count", "link", "clean_tweet", "translated_tweet"])
    
    for i in range(len(all_tweets_list)):
        
        tweet = all_tweets_list[i]
        
        
        language = tweet[11]
        replies_count = tweet[15]
        retweets_count = tweet[16]
        likes_count = tweet[17]
        link = tweet[20]
        clean_tweet = tweet[36]
        
        if (language == target_language and int(likes_count) >= target_min_likes and len(clean_tweet.split(" ")) >= target_min_length):
            
            import deep_translator
            from deep_translator import GoogleTranslator

            # Translating Turkish tweets to English so as to allow the NLP algorithm to analyze:
            translated_tweet = GoogleTranslator(source='auto', target='en').translate(clean_tweet)
            
            from textblob import TextBlob

            # Calculating subjectivity and polarity of translated tweets:
            subjectivity = TextBlob(translated_tweet).subjectivity
            polarity = TextBlob(translated_tweet).polarity

            writer.writerow((subjectivity, polarity, replies_count, retweets_count, likes_count, link, clean_tweet, translated_tweet))

print("Successfully completed!")

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('search_result_filtered.csv')
df.head()