In [45]:
import datetime
from datetime import date
import math
import numpy as np
import pandas as pd
import tensorflow as tf
import re

In [46]:
def clean_tweet(tweet):
    return ' '.join(re.sub('(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)',
                           ' ', tweet).split())


def str_to_date(string):
    return datetime.datetime.strptime(string, '%Y-%m-%d').date()


def print_weights(weights):
    # weights = model.get_weights();
    print('\n******* WEIGHTS OF ANN *******\n')
    for i in range(int(len(weights) / 2)):
        print('Weights W%d:\n' % (i), weights[i * 2])
        print('Bias b%d:\n' % (i), weights[(i * 2) + 1])


def normalize_column(dataframe, col_name):
    maximum = max(dataframe[col_name])
    minimum = min(dataframe[col_name])
    dataframe[col_name] = (dataframe[col_name] - minimum) / (maximum - minimum)
    return dataframe[col_name]

In [76]:
print('\n\n********** CLEANING TWEETS **********\n\n')
filename = "Apple_Blog_tweets.csv"
df = pd.read_csv(filename)



********** CLEANING TWEETS **********




In [77]:
df["Clean Tweet"] = df['text'].apply(lambda x: clean_tweet(x))
df['created_at'] = pd.to_datetime(df['created_at']).dt.date

In [78]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [80]:
df.to_csv('clean.csv')

In [79]:
print(df)

                                                  text  created_at  \
0    YouTube reportedly testing Picture in Picture ...  2020-08-28   
1    @hildebrand030 @TheRyader Correct, and we talk...  2020-08-28   
2    RT @Andrew_OSU : New photo comparison! We got ...  2020-08-28   
3    Apple is paying $9.75M to settle Powerbeats2 c...  2020-08-28   
4    #Apple has terminated #EpicGames' developer ac...  2020-08-28   
5    #Facebook CEO Mark Zuckerberg took aim at Appl...  2020-08-28   
6    Apple TV+ series 'Invasion' restarts productio...  2020-08-28   
7    Apple holds sales on iTunes content every week...  2020-08-28   
8    Review: SanDisk Extreme Pro CFExpress card rea...  2020-08-28   
9    A new suite of privacy features in #iOS14 has ...  2020-08-28   
10   Check out the new, lower price on #Apple's 16-...  2020-08-28   
11   What #Apple's #PowerPC to #Intel transition ca...  2020-08-28   
12   Future Apple Watch may anticipate workouts whe...  2020-08-28   
13   A new study say

In [51]:
print('\n\n********** IDENTIFYING KEY WORDS **********\n\n')
key_words = ['Europe', 'China', 'tariff', 'Stock Market', 'economy', 'bank',
             'trade', 'jobs', 'money', 'dollar', 'currency', 'Xi', 'deal', 'growth']



********** IDENTIFYING KEY WORDS **********




In [52]:
df['noof_keywords'] = np.where(df.text.str.contains('|'.join(key_words)),1,0)

In [53]:
for key_word in key_words:
    df[key_word] = np.where(df.text.str.contains(key_word), 1, 0)

In [65]:
invalid_days = ['2020-08-03', '2020-08-04', '2020-08-05', '2020-08-06', '2020-08-07',
                '2020-08-10', '2020-08-11', '2020-08-12', '2020-08-13', '2020-08-14',
                '2020-08-17', '2020-08-18', '2020-08-19', '2020-08-20', '2020-08-21',
                '2020-08-24', '2020-08-25', '2020-08-26', '2020-08-27', '2020-08-28']
missing_days = ['']
noof_missing_days = len(missing_days)
invalid_days = list(map(lambda x: str_to_date(x), invalid_days))

In [66]:
print(invalid_days)

[datetime.date(2020, 8, 3), datetime.date(2020, 8, 4), datetime.date(2020, 8, 5), datetime.date(2020, 8, 6), datetime.date(2020, 8, 7), datetime.date(2020, 8, 10), datetime.date(2020, 8, 11), datetime.date(2020, 8, 12), datetime.date(2020, 8, 13), datetime.date(2020, 8, 14), datetime.date(2020, 8, 17), datetime.date(2020, 8, 18), datetime.date(2020, 8, 19), datetime.date(2020, 8, 20), datetime.date(2020, 8, 21), datetime.date(2020, 8, 24), datetime.date(2020, 8, 25), datetime.date(2020, 8, 26), datetime.date(2020, 8, 27), datetime.date(2020, 8, 28)]


In [74]:
for index, row in df.iterrows():
    if row['created_at'] in invalid_days:
        new_day = row['created_at'] + datetime.timedelta(days=1)
        df.loc[index, 'created_at'] = new_day

In [75]:
print(df)

                                                  text  created_at  \
0    YouTube reportedly testing Picture in Picture ...  2020-08-29   
1    @hildebrand030 @TheRyader Correct, and we talk...  2020-08-29   
2    RT @Andrew_OSU : New photo comparison! We got ...  2020-08-29   
3    Apple is paying $9.75M to settle Powerbeats2 c...  2020-08-29   
4    #Apple has terminated #EpicGames' developer ac...  2020-08-29   
5    #Facebook CEO Mark Zuckerberg took aim at Appl...  2020-08-29   
6    Apple TV+ series 'Invasion' restarts productio...  2020-08-29   
7    Apple holds sales on iTunes content every week...  2020-08-29   
8    Review: SanDisk Extreme Pro CFExpress card rea...  2020-08-29   
9    A new suite of privacy features in #iOS14 has ...  2020-08-29   
10   Check out the new, lower price on #Apple's 16-...  2020-08-29   
11   What #Apple's #PowerPC to #Intel transition ca...  2020-08-29   
12   Future Apple Watch may anticipate workouts whe...  2020-08-29   
13   A new study say