# NLP with oil and renewable energies project

#### Get libraries

In [2]:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import requests, json
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import seaborn as sns

## Creating month weights

In [3]:
corpus_oil_price=pd.read_csv("data/corpus_oil_price.csv", index_col=0)
corpus_alternative_energies=pd.read_csv("data/corpus_alternative_energies.csv", index_col=0)
corpus_crude_oil=pd.read_csv("data/corpus_crude_oil.csv", index_col=0)

In [4]:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], utc=True)
corpus_crude_oil


Unnamed: 0,Unnamed: 0.1,title,date_published,content,baseline_sentiment,vader_sentiment
0,0,Oil Bulls Rejoice As Biden's Supply Strategy B...,2021-11-25 00:00:00+00:00,President Biden&rsquo;s threat to oil producer...,-1,-0.9926
1,1,"OPEC To Add 400,000 Bpd In January Despite Oil...",2021-12-02 14:53:55+00:00,The OPEC group is sticking to its plan to ease...,0,0.4497
2,2,UAE Pumps $6 Billion Into Oil And Gas Expansio...,2021-11-24 23:00:00+00:00,The U.S.-sponsored &lsquo;relationship normali...,1,0.9875
3,3,China Keeps Markets In The Dark About SPR Release,2021-11-24 22:00:00+00:00,The volume of the expected Chinese release of ...,-1,-0.9552
4,4,Oil Markets Unimpressed By Small Crude Invento...,2021-11-24 15:36:00+00:00,"Crude oil inventories rose last week, while ga...",-1,-0.9601
...,...,...,...,...,...,...
5000,5000,A Detailed Guide on the Many Different Types o...,2009-12-02 23:12:01+00:00,Some people arbitrarily speak about oil as if ...,-1,-0.9962
5001,5001,What is Peak Oil Theory A Thorough Look at Thi...,2009-10-21 21:17:44+00:00,Currently there is a lot of debate going on re...,1,-0.1567
5002,5002,Oil Shale - So Just What Is It,2009-09-24 22:26:55+00:00,People often say &ldquo;You can&rsquo;t squeez...,-1,0.9816
5003,5003,Oil is not a Villain Here's what it's done for us,2009-09-21 21:45:02+00:00,"For decades now, oil has often been vilified a...",1,0.9987


In [6]:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published').mean()

Unnamed: 0_level_0,Unnamed: 0.1,baseline_sentiment,vader_sentiment
date_published,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-07-31 00:00:00+00:00,5004.000000,-1.000000,-0.987900
2009-08-31 00:00:00+00:00,,,
2009-09-30 00:00:00+00:00,5002.500000,0.000000,0.990150
2009-10-31 00:00:00+00:00,5001.000000,1.000000,-0.156700
2009-11-30 00:00:00+00:00,,,
...,...,...,...
2021-08-31 00:00:00+00:00,227.863636,-0.090909,0.196308
2021-09-30 00:00:00+00:00,153.280000,-0.120000,0.031880
2021-10-31 00:00:00+00:00,88.098039,-0.098039,0.201992
2021-11-30 00:00:00+00:00,58.539683,-0.079365,0.058449


In [8]:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y-%m')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y-%m')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y-%m')
corpus_crude_oil_resampled

Unnamed: 0_level_0,Unnamed: 0.1,baseline_sentiment,vader_sentiment
date_published,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-07,5004.000000,-1.000000,-0.987900
2009-08,,,
2009-09,5002.500000,0.000000,0.990150
2009-10,5001.000000,1.000000,-0.156700
2009-11,,,
...,...,...,...
2021-08,227.863636,-0.090909,0.196308
2021-09,153.280000,-0.120000,0.031880
2021-10,88.098039,-0.098039,0.201992
2021-11,58.539683,-0.079365,0.058449


### Regression with oil prices (time series analysis)

In [10]:
corpus_oil_price_resampled

Unnamed: 0_level_0,baseline_sentiment,vader_sentiment
date_published,Unnamed: 1_level_1,Unnamed: 2_level_1
2009-11,1.000000,0.998550
2009-12,-1.000000,0.098200
2010-01,0.000000,0.128680
2010-02,0.000000,0.567283
2010-03,0.111111,0.848289
...,...,...
2021-08,-0.560000,-0.545292
2021-09,-0.250000,-0.237639
2021-10,-0.476190,-0.447929
2021-11,-0.800000,-0.169070


In [14]:
corpus_oil_price_resampled.isna().sum()

baseline_sentiment    4
vader_sentiment       4
dtype: int64

In [16]:
is_NaN = corpus_oil_price_resampled.isnull()
row_has_NaN = is_NaN.any(axis=1)
rows_with_NaN = corpus_oil_price_resampled[row_has_NaN]
rows_with_NaN

Unnamed: 0_level_0,baseline_sentiment,vader_sentiment
date_published,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-11,,
2013-05,,
2013-07,,
2014-01,,


In [None]:
1662/27:
corpus_oil_price=pd.read_csv("oil_price.csv", index_col=0)
corpus_alternative_energies=pd.read_csv("alternative_energies.csv")
corpus_crude_oil=pd.read_csv("crude_oil.csv")
1662/28: corpus_oil_price
1662/29:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        return "positive"
    elif len(pos_words) < len(neg_words):
        return 'negative'
    else:
        return f'neutral'
1662/30:
corpus_oil_price=pd.read_csv("oil_price.csv", index_col=0)
corpus_alternative_energies=pd.read_csv("alternative_energies.csv")
corpus_crude_oil=pd.read_csv("crude_oil.csv")
corpus_oil_price
1662/31: col_one_list = corpus_oil_price['content'].tolist()
1662/32:
col_one_list = corpus_oil_price['content'].tolist()
col_one_list
1662/33:
col_one_list = corpus_oil_price['content'].tolist()
col_one_list_2 = corpus_alternative_energies['content'].tolist()
col_one_list_3 = corpus_crude_oil['content'].tolist()
1662/34:
sentiment_list = []
for i in in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(sentiment_analizer(pos_list,neg_list,i))
1662/35:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(sentiment_analizer(pos_list,neg_list,i))
1663/1:
import os
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from PIL import Image
import numpy as np
1663/2: Image.open('emoji.png')
1663/3:
# reading the file and creating pos and neg list
for file in os.listdir('./'):
    if file.startswith('pos'):
        with open(file,mode='r',encoding = 'latin1') as fp:
            pos_list= fp.read().split()
    elif file.startswith('nega'):
        with open(file,mode='r',encoding='latin1') as fn:
             neg_list = fn.read().split()
1663/4: len(neg_list),len(pos_list)
1663/5: text = "It is a beautiful shitty day"
1663/6:
#normal for and if loop

pos_words = []
neg_words = []
for word in text.split():
    if word in pos_list:
        pos_words.append(word)
    elif word in neg_list:
        neg_words.append(words)
1663/7:
## Efficient way by using list comprehension

pos_words = [word for word in text.split() if word in pos_list]
neg_words = [word for word in text.split() if word in neg_list]
1663/8: pos_words, neg_words
1663/9:
if len(pos_words) > len(neg_words):
    print('positive')
elif len(pos_words) < len(neg_words):
    print('negative')
else:
    print('netral')
1662/36:
# reading the file and creating pos and neg list
for file in os.listdir('./'):
    if file.startswith('pos'):
        with open(file,mode='r',encoding = 'latin1') as fp:
            pos_list= fp.read().split()
    elif file.startswith('nega'):
        with open(file,mode='r',encoding='latin1') as fn:
             neg_list = fn.read().split()
1662/37:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import requests, json
import os
1662/38:
# reading the file and creating pos and neg list
for file in os.listdir('./'):
    if file.startswith('pos'):
        with open(file,mode='r',encoding = 'latin1') as fp:
            pos_list= fp.read().split()
    elif file.startswith('nega'):
        with open(file,mode='r',encoding='latin1') as fn:
             neg_list = fn.read().split()
1662/39:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        return "positive"
    elif len(pos_words) < len(neg_words):
        return 'negative'
    else:
        return f'neutral'
1662/40:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(sentiment_analizer(pos_list,neg_list,i))
1662/41:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result = "positive"
    elif len(pos_words) < len(neg_words):
        result = 'negative'
    else:
        result = 'neutral'
    return result
1662/42:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/43:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result = "positive"
    elif len(pos_words) < len(neg_words):
        result = 'negative'
    else:
        result = 'neutral'
    return result
1662/44:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/45:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    result = []
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result.append["positive"]
    elif len(pos_words) < len(neg_words):
        result.append["negative"]
    else:
        result.append["neutral"]
    return result
1662/46:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/47:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    result = 0
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result.append["positive"]
    elif len(pos_words) < len(neg_words):
        result.append["negative"]
    else:
        result.append["neutral"]
    return result
1662/48:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/49:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    result = 0
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    return result
1662/50:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/51:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    result = 0
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
return result
1662/52:
def sentiment_analizer(pos_list,neg_list,text):
    """calculate the sentiment of a string based on word count of positive and negative terms

        params:
        text - the string to be assessed 
        neg_list - list of negative words
        pos_list - list of positive

        returns: classification either positive, negative or neutral"""
    
    result = 0
    pos_words = [word for word in text.split() if word in pos_list]
    print(pos_words)
    neg_words = [word for word in text.split() if word in neg_list]
    print(neg_words)
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    return result
1662/53:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/54:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(sentiment_analizer)
1662/55:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    #sentiment_list.append(sentiment_analizer)
1662/56:
sentiment_list = []
for i in col_one_list:
    sentiment_analizer(pos_list,neg_list,i)
    sentiment_list.append(result)
1662/57:
sentiment_list = []
result = 0
for i in col_one_list:
    pos_words = [word for word in text.split() if word in pos_list]
    neg_words = [word for word in text.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list.append(result)
1662/58:
sentiment_list = []
result = 0
for i in col_one_list:
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list.append(result)
1662/59: sentiment_list
1662/60:
sentiment_list = []
for i in col_one_list:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list.append(result)
1662/61: sentiment_list
1662/62:
sentiment_list2 = []
for i in col_one_list_2:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_2.append(result)
1662/63:
sentiment_list_2 = []
for i in col_one_list_2:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_2.append(result)
1662/64:
sentiment_list_3 = []
for i in col_one_list_3:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_3.append(result)
1662/65:
corpus_oil_price['baseline_sentiment'] = sentiment_list
corpus_alternative_energies['baseline_sentiment'] = sentiment_list_2
corpus_crude_oil['baseline_sentiment'] = sentiment_list_3
1662/66:
corpus_oil_price['baseline_sentiment'] = sentiment_list
corpus_alternative_energies['baseline_sentiment'] = sentiment_list_2
corpus_crude_oil['baseline_sentiment'] = sentiment_list_3
corpus_oil_price
1662/67:
corpus_oil_price['baseline_sentiment'] = sentiment_list
corpus_alternative_energies['baseline_sentiment'] = sentiment_list_2
corpus_crude_oil['baseline_sentiment'] = sentiment_list_3
corpus_alternative_energies
1662/68:
corpus_oil_price['baseline_sentiment'] = sentiment_list
corpus_alternative_energies['baseline_sentiment'] = sentiment_list_2
corpus_crude_oil['baseline_sentiment'] = sentiment_list_3
corpus_crude_oil
1662/69: pip install vaderSentiment
1662/70:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import requests, json
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
1662/71: s = SentimentIntensityAnalyzer()
1662/72: len(s.lexicon)
1662/73: s.polarity_scores('I TOTALLY!! like this library ')
1662/74: s.polarity_scores('I TOTALLY!! like this library ')['compound']
1662/75:
test_result_list = []
for t in test_list:
    vader = s.polarity_scores('t)['compound']
    test_result_list.append(vader)
1662/76:
test_result_list = []
for t in test_list:
    vader = s.polarity_scores(t)['compound']
    test_result_list.append(vader)
1662/77: test_list = ['I TOTALLY!! like this library ', 'this library totally sucks']
1662/78:
test_result_list = []
for t in test_list:
    vader = s.polarity_scores(t)['compound']
    test_result_list.append(vader)
1662/79:
test_result_list = []
for t in test_list:
    vader = s.polarity_scores(t)['compound']
    test_result_list.append(vader)
test_result_list
1662/80:
vader_result_list = []
for t in col_one_list:
    vader = s.polarity_scores(t)['compound']
    test_result_list.append(vader)
vader_result_list
1662/81:
vader_result_list = []
for t in col_one_list:
    vader = s.polarity_scores(t)['compound']
    vader_result_list.append(vader)
vader_result_list
1662/82:
vader_result_list_2 = []
for t in col_one_list_2:
    vader = s.polarity_scores(t)['compound']
    vader_result_list_2.append(vader)
1662/83:
vader_result_list_3 = []
for t in col_one_list_3:
    vader = s.polarity_scores(t)['compound']
    vader_result_list_3.append(vader)
1662/84:
corpus_oil_price['vader_sentiment'] = vader_result_list
corpus_alternative_energies['vader_sentiment'] = vader_result_list_2
corpus_crude_oil['vader_sentiment'] = vader_result_list_3
1662/85: corpus_oil_price
1662/86:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"])
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"])
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"])
1662/87:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"])
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"])
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"])
corpus_crude_oil
1662/88:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], utc=True)
corpus_crude_oil
1662/89: corpus_crude_oil.resample('M', on='date_published').mean()
1662/90: data_crude = corpus_crude_oil.resample('M', on='date_published').mean()
1662/91:
data_crude = corpus_crude_oil.resample('M', on='date_published').mean()
data_crude
1662/92:
data_crude = corpus_crude_oil.resample('M', on='date_published').mean()
data_crude2 = data_crude["date_published"].dt.strftime("%m/%y")
1662/93:
data_crude = corpus_crude_oil.resample('M', on='date_published').mean()
data_crude2 = data_crude["date_published"].dt.strftime("%m/%Y")
1662/94:
corpus_crude_oil.resample('M', on='date_published').mean()
data_crude = corpus_crude_oil["date_published"].dt.strftime("%m/%Y")
1662/95:
corpus_crude_oil.resample('M', on='date_published').mean()
data_crude = corpus_crude_oil["date_published"].dt.strftime("%m/%Y")
data_crude
1662/96:
corpus_crude_oil.resample('M', on='date_published').mean()
corpus_crude_oil
1662/97: corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
1662/98:
corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
corpus_crude_oil_resampled
1662/99:
corpus_crude_oil_resampled = corpus_crude_oil[["date_published"]].dt.strftime("%m/%y")
corpus_crude_oil_resampled
1662/100:
corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
corpus_crude_oil_resampled
1662/101:
corpus_oil_price_resampled = corpus_oil_price["date_published"].dt.strftime("%m/%y")
corpus_alternative_energies_resampled = corpus_alternative_energies["date_published"].dt.strftime("%m/%y")
corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
1662/102:
corpus_oil_price_resampled = corpus_oil_price["date_published"].dt.strftime("%m/%y")
corpus_alternative_energies_resampled = corpus_alternative_energies["date_published"].dt.strftime("%m/%y")
corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
corpus_crude_oil_resampled
1662/103:
corpus_oil_price['date_published_2'] = corpus_oil_price["date_published"].dt.strftime("%m/%y")
#corpus_alternative_energies_resampled = corpus_alternative_energies["date_published"].dt.strftime("%m/%y")
#corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
corpus_oil_price
1662/104:
corpus_oil_price['date_published_2'] = corpus_oil_price["date_published"].dt.strftime("%m/%Y")
#corpus_alternative_energies_resampled = corpus_alternative_energies["date_published"].dt.strftime("%m/%y")
#corpus_crude_oil_resampled = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
corpus_oil_price
1662/105:
corpus_oil_price['date_published_2'] = corpus_oil_price["date_published"].dt.strftime("%m/%Y")
corpus_alternative_energies['date_published_2'] = corpus_alternative_energies["date_published"].dt.strftime("%m/%y")
corpus_crude_oil['date_published_2'] = corpus_crude_oil["date_published"].dt.strftime("%m/%y")
corpus_crude_oil
1662/106:
corpus_oil_price['date_published_2'] = corpus_oil_price["date_published"].dt.strftime("%m/%Y")
corpus_alternative_energies['date_published_2'] = corpus_alternative_energies["date_published"].dt.strftime("%m/%Y")
corpus_crude_oil['date_published_2'] = corpus_crude_oil["date_published"].dt.strftime("%m/%Y")
corpus_crude_oil
1662/107:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published_2').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published_2').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published_2').mean()
corpus_crude_oil
1662/108: corpus_oil_price.dtypes
1662/109:
corpus_oil_price["date_published_2"] = pd.to_datetime(corpus_oil_price["date_published_2"], utc=True)
corpus_alternative_energies["date_published_2"] = pd.to_datetime(corpus_alternative_energies["date_published_2"], utc=True)
corpus_crude_oil["date_published_2"] = pd.to_datetime(corpus_crude_oil["date_published_2"], utc=True)
1662/110:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published_2').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published_2').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published_2').mean()
corpus_crude_oil
1662/111:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], format='%Y%m', utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], format='%Y%m', utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], format='%Y%m', utc=True)
corpus_crude_oil
1662/112:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], format='%Y%m', utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], format='%Y%m', utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], format='%Y%m', utc=True)
corpus_crude_oil
1665/1:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import requests, json
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
1665/2:
corpus_oil_price=pd.read_csv("oil_price.csv", index_col=0)
corpus_alternative_energies=pd.read_csv("alternative_energies.csv")
corpus_crude_oil=pd.read_csv("crude_oil.csv")
corpus_oil_price
1665/3:
col_one_list = corpus_oil_price['content'].tolist()
col_one_list_2 = corpus_alternative_energies['content'].tolist()
col_one_list_3 = corpus_crude_oil['content'].tolist()
1665/4:
# reading the file and creating pos and neg list
for file in os.listdir('./'):
    if file.startswith('pos'):
        with open(file,mode='r',encoding = 'latin1') as fp:
            pos_list= fp.read().split()
    elif file.startswith('nega'):
        with open(file,mode='r',encoding='latin1') as fn:
             neg_list = fn.read().split()
1665/5:
sentiment_list = []
for i in col_one_list:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list.append(result)
1665/6:
sentiment_list_2 = []
for i in col_one_list_2:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_2.append(result)
1665/7:
sentiment_list_3 = []
for i in col_one_list_3:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_3.append(result)
1665/8:
corpus_oil_price['baseline_sentiment'] = sentiment_list
corpus_alternative_energies['baseline_sentiment'] = sentiment_list_2
corpus_crude_oil['baseline_sentiment'] = sentiment_list_3
1665/9: s = SentimentIntensityAnalyzer()
1665/10: len(s.lexicon)
1665/11:
vader_result_list = []
for t in col_one_list:
    vader = s.polarity_scores(t)['compound']
    vader_result_list.append(vader)
1665/12:
vader_result_list_2 = []
for t in col_one_list_2:
    vader = s.polarity_scores(t)['compound']
    vader_result_list_2.append(vader)
1665/13:
vader_result_list_3 = []
for t in col_one_list_3:
    vader = s.polarity_scores(t)['compound']
    vader_result_list_3.append(vader)
1665/14:
corpus_oil_price['vader_sentiment'] = vader_result_list
corpus_alternative_energies['vader_sentiment'] = vader_result_list_2
corpus_crude_oil['vader_sentiment'] = vader_result_list_3
1665/15: corpus_oil_price
1665/16:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], format='%Y%m', utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], format='%Y%m', utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], format='%Y%m', utc=True)
corpus_crude_oil
1665/17:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], utc=True)
1665/18:
corpus_oil_price['date_published'] = corpus_oil_price["date_published"].dt.strftime("%m/%Y")
corpus_alternative_energies['date_published'] = corpus_alternative_energies["date_published"].dt.strftime("%m/%Y")
corpus_crude_oil['date_published'] = corpus_crude_oil["date_published"].dt.strftime("%m/%Y")
1665/19:
corpus_oil_price['date_published'] = corpus_oil_price["date_published"].dt.strftime("%m/%Y")
corpus_alternative_energies['date_published'] = corpus_alternative_energies["date_published"].dt.strftime("%m/%Y")
corpus_crude_oil['date_published'] = corpus_crude_oil["date_published"].dt.strftime("%m/%Y")
corpus_crude_oil
1665/20:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], utc=True)
corpus_crude_oil
1665/21:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"].dt.strftime('%Y-%m'), utc=True)
1665/22:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_crude_oil
1665/23:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published').mean()
corpus_crude_oil
1665/24:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published').mean()
corpus_crude_oil_resampled
1665/25:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published').mean()
corpus_crude_oil_resampled.dtypes
1665/26:
corpus_oil_price_resampled = corpus_oil_price.resample('M', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('M', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('M', on='date_published').mean()
corpus_crude_oil_resampled
1665/27: corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y-%m')
1665/28:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y-%m')
corpus_oil_price_resampled
1665/29:
#corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y-%m')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y-%m')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y-%m')
1665/30: corpus_crude_oil_resampled
1665/31: corpus_alternative_energies_resampled
1665/32:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import requests, json
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import seaborn as sns
1665/33: sns.lineplot(data=corpus_oil_price_resampled)
1665/34:
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_oil_price_resampled)
1665/35:
corpus_oil_price_resampled = corpus_oil_price.resample('Y', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('Y', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('Y', on='date_published').mean()
1665/36: corpus_crude_oil_resampled
1665/37:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y')
1665/38:
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_oil_price_resampled)
1665/39:
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_alternative_energies_resampled)
1665/40:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled
1665/41:
corpus_alternative_energies_resampled.drop(['Unnamed: 0'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_alternative_energies_resampled)
1665/42:
corpus_crude_oil_resampled.drop(['Unnamed: 0'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_crude_oil_resampled)
1665/43: pip install streamlit
1665/44:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y/%m')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y/%m')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y/%m')
corpus_alternative_energies_resampled
1665/45:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import requests, json
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import seaborn as sns
1665/46:
corpus_oil_price=pd.read_csv("oil_price.csv", index_col=0)
corpus_alternative_energies=pd.read_csv("alternative_energies.csv")
corpus_crude_oil=pd.read_csv("crude_oil.csv")
corpus_oil_price
1665/47:
col_one_list = corpus_oil_price['content'].tolist()
col_one_list_2 = corpus_alternative_energies['content'].tolist()
col_one_list_3 = corpus_crude_oil['content'].tolist()
1665/48:
# reading the file and creating pos and neg list
for file in os.listdir('./'):
    if file.startswith('pos'):
        with open(file,mode='r',encoding = 'latin1') as fp:
            pos_list= fp.read().split()
    elif file.startswith('nega'):
        with open(file,mode='r',encoding='latin1') as fn:
             neg_list = fn.read().split()
1665/49:
sentiment_list = []
for i in col_one_list:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list.append(result)
1665/50:
sentiment_list_2 = []
for i in col_one_list_2:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_2.append(result)
1665/51:
sentiment_list_3 = []
for i in col_one_list_3:
    result = 0
    pos_words = [word for word in i.split() if word in pos_list]
    neg_words = [word for word in i.split() if word in neg_list]
    if len(pos_words) > len(neg_words):
        result += 1
    elif len(pos_words) < len(neg_words):
        result += -1
    else:
        result += 0
    sentiment_list_3.append(result)
1665/52:
corpus_oil_price['baseline_sentiment'] = sentiment_list
corpus_alternative_energies['baseline_sentiment'] = sentiment_list_2
corpus_crude_oil['baseline_sentiment'] = sentiment_list_3
1665/53: s = SentimentIntensityAnalyzer()
1665/54: len(s.lexicon)
1665/55:
vader_result_list = []
for t in col_one_list:
    vader = s.polarity_scores(t)['compound']
    vader_result_list.append(vader)
1665/56:
vader_result_list_2 = []
for t in col_one_list_2:
    vader = s.polarity_scores(t)['compound']
    vader_result_list_2.append(vader)
1665/57:
vader_result_list_3 = []
for t in col_one_list_3:
    vader = s.polarity_scores(t)['compound']
    vader_result_list_3.append(vader)
1665/58:
corpus_oil_price['vader_sentiment'] = vader_result_list
corpus_alternative_energies['vader_sentiment'] = vader_result_list_2
corpus_crude_oil['vader_sentiment'] = vader_result_list_3
1665/59: corpus_oil_price
1665/60:
corpus_oil_price.to_csv['data/corpus_oil_price.csv']
corpus_alternative_energies.to_csv['data/corpus_alternative_energies.csv']
corpus_crude_oil.to_csv['data/corpus_crude_oil.csv']
1665/61:
corpus_oil_price.to_csv('data/corpus_oil_price.csv')
corpus_alternative_energies.to_csv('data/corpus_alternative_energies.csv')
corpus_crude_oil.to_csv('data/corpus_crude_oil.csv')
1665/62:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"].dt.strftime('%Y-%m'), utc=True)
corpus_crude_oil
1665/63:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], utc=True)
corpus_crude_oil
1665/64:
corpus_oil_price_resampled = corpus_oil_price.resample('Y', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('Y', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('Y', on='date_published').mean()
1665/65:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled
1665/66:
corpus_oil_price=pd.read_csv("data/corpus_oil_price.csv", index_col=0)
corpus_alternative_energies=pd.read_csv("data/corpus_alternative_energies.csv", index_col=0)
corpus_crude_oil=pd.read_csv("data/corpus_crude_oil.csv", index_col=0)
1665/67:
corpus_oil_price["date_published"] = pd.to_datetime(corpus_oil_price["date_published"], utc=True)
corpus_alternative_energies["date_published"] = pd.to_datetime(corpus_alternative_energies["date_published"], utc=True)
corpus_crude_oil["date_published"] = pd.to_datetime(corpus_crude_oil["date_published"], utc=True)
corpus_crude_oil
1665/68:
corpus_oil_price_resampled = corpus_oil_price.resample('Y', on='date_published').mean()
corpus_alternative_energies_resampled = corpus_alternative_energies.resample('Y', on='date_published').mean()
corpus_crude_oil_resampled = corpus_crude_oil.resample('Y', on='date_published').mean()
1665/69:
corpus_oil_price_resampled.index = pd.to_datetime(corpus_oil_price_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled.index = pd.to_datetime(corpus_alternative_energies_resampled.index).strftime('%Y')
corpus_crude_oil_resampled.index = pd.to_datetime(corpus_crude_oil_resampled.index).strftime('%Y')
corpus_alternative_energies_resampled
1665/70: stats = pd.read_csv("data/stats.csv", index_col=0)
1665/71:
stats = pd.read_csv("data/stats.csv", index_col=0)
stats
1665/72:
stats = pd.read_csv("data/stats.csv", index_col=0)
stats
1665/73:
corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled.merge(stats, on="date_published")
1665/74:
df_oil_price = pd.concat([corpus_oil_price_resampled, stats], sort=False)
df_alternatives = pd.concat([corpus_alternative_energies_resampled, stats], sort=False)
df_crude_oil = pd.concat([corpus_crude_oil_resampled, stats], sort=False)
1665/75: df_crude_oil
1665/76: df_oil_price = corpus_oil_price_resampled.merge(stats, on="date_published")
1665/77:
df_oil_price = pd.concat([df1, df3], sort=False, join="inner")
df_alternatives = pd.concat([df1, df3], sort=False, join="inner")
df_crude_oil = pd.concat([df1, df3], sort=False, join="inner")
1665/78:
df_oil_price = pd.concat([corpus_oil_price_resampled, stats], sort=False, join="inner")
df_alternatives = pd.concat([corpus_alternative_energies_resampled, stats], sort=False, join="inner")
df_crude_oil = pd.concat([corpus_crude_oil_resampled, stats], sort=False, join="inner")
1665/79:
df_oil_price = pd.concat([corpus_oil_price_resampled, stats], sort=False, join="inner")
df_alternatives = pd.concat([corpus_alternative_energies_resampled, stats], sort=False, join="inner")
df_crude_oil = pd.concat([corpus_crude_oil_resampled, stats], sort=False, join="inner")
df_crude_oil
1665/80:
df_oil_price = pd.concat([corpus_oil_price_resampled, stats], sort=False, index=[date_published])
df_alternatives = pd.concat([corpus_alternative_energies_resampled, stats], sort=False, index=[date_published])
df_crude_oil = pd.concat([corpus_crude_oil_resampled, stats], sort=False, index=[date_published])
df_crude_oil
1665/81:
corpus_oil_price_resampled.to_csv('data/corpus_oil_price_2.csv')
corpus_alternative_energies_resampled.to_csv('data/corpus_alternative_energies_2.csv')
corpus_crude_oil_resampled.to_csv('data/corpus_crude_oil_2.csv')
1665/82:
corpus_oil_price_resampled=pd.read_csv("data/corpus_oil_price_2.csv", index_col=0)
corpus_alternative_energies_resampled=pd.read_csv("data/corpus_alternative_energies_2.csv", index_col=0)
corpus_crude_oil_resampled=pd.read_csv("data/corpus_crude_oil_2.csv", index_col=0)
1665/83:
stats = pd.read_csv("data/stats.csv", index_col=0)
stats
1665/84:
corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled.merge(stats, on="date_published")
1665/85:
corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled
1665/86:
corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled
1665/87: corpus_oil_price_resampled
1665/88:
corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled.merge(stats, on="date_published")
1665/89: corpus_oil_price_resampled
1665/90:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
1665/91: corpus_oil_price_resampled_2
1665/92:
corpus_alternative_energies_resampled.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_oil_price_resampled)
1665/93:
corpus_alternative_energies_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/94:
corpus_oil_price_resampled=pd.read_csv("data/corpus_oil_price_2.csv", index_col=0)
corpus_alternative_energies_resampled=pd.read_csv("data/corpus_alternative_energies_2.csv", index_col=0)
corpus_crude_oil_resampled=pd.read_csv("data/corpus_crude_oil_2.csv", index_col=0)
1665/95:
stats = pd.read_csv("data/stats.csv", index_col=0)
stats
1665/96:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
1665/97:
corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/98:
corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax2 = plt.twinx()
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/99:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax2 = plt.twinx()
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/100:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax2 = ax1.twinx()
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/101:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax2 = plt.twinx()
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/102:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax2=ax.twinx()
sns.lineplot(data=corpus_oil_price_resampled_2)
1665/103:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_oil_price_resampled_2.plot(x="date_published", y="vader_sentiment", legend=False)
ax2 = ax.twinx()
df.plot(x="date_published", y="$ money of the day", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/104:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_oil_price_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
df.plot(y="$ money of the day", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/105:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_oil_price_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_oil_price_resampled_2.plot(y="$ money of the day", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/106:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_oil_price_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_oil_price_resampled_2.plot(y="$ 2020", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/107:
#corpus_oil_price_resampled_2.drop(['oil production', 'renewables'], axis=1, inplace=True)
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_alternative_energies_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_alternative_energies_resampled_2.plot(y="renewables", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/108:
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_crude_oil_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_crude_oil_resampled_2.plot(y="oil production", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/109:
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_crude_oil_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_crude_oil_resampled_2.plot(y="oil production", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/110:
corpus_oil_price_resampled=pd.read_csv("data/corpus_oil_price_2.csv", index_col=0)
corpus_alternative_energies_resampled=pd.read_csv("data/corpus_alternative_energies_2.csv", index_col=0)
corpus_crude_oil_resampled=pd.read_csv("data/corpus_crude_oil_2.csv", index_col=0)
1665/111:
stats = pd.read_csv("data/stats.csv", index_col=0)
stats
1665/112:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop(index='2009')
corpus_alternative_energies_resampled_2.drop(index='2009')
corpus_crude_oil_resampled_2.drop(index='2009')
1665/113:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop('2009')
corpus_alternative_energies_resampled_2.drop('2009')
corpus_crude_oil_resampled_2.drop('2009')
1665/114:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop([0], axis=0, inplace=True)
corpus_alternative_energies_resampled_2.drop([0], axis=0, inplace=True)
corpus_crude_oil_resampled_2.drop([0], axis=0, inplace=True)
1665/115:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop([1], axis=0, inplace=True)
corpus_alternative_energies_resampled_2.drop([1], axis=0, inplace=True)
corpus_crude_oil_resampled_2.drop([1], axis=0, inplace=True)
1665/116:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop(0, axis=0, inplace=True)
corpus_alternative_energies_resampled_2.drop(0, axis=0, inplace=True)
corpus_crude_oil_resampled_2.drop(0, axis=0, inplace=True)
1665/117:
corpus_oil_price_resampled=pd.read_csv("data/corpus_oil_price_2.csv", index_col=0)
corpus_alternative_energies_resampled=pd.read_csv("data/corpus_alternative_energies_2.csv", index_col=0)
corpus_crude_oil_resampled=pd.read_csv("data/corpus_crude_oil_2.csv", index_col=0)
1665/118:
stats = pd.read_csv("data/stats.csv", index_col=0)
stats
1665/119:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop(0, axis=0, inplace=True)
corpus_alternative_energies_resampled_2.drop(0, axis=0, inplace=True)
corpus_crude_oil_resampled_2.drop(0, axis=0, inplace=True)
1665/120:
corpus_oil_price_resampled_2 = corpus_oil_price_resampled.merge(stats, on="date_published")
corpus_alternative_energies_resampled_2 = corpus_alternative_energies_resampled.merge(stats, on="date_published")
corpus_crude_oil_resampled_2 = corpus_crude_oil_resampled.merge(stats, on="date_published")
corpus_oil_price_resampled_2.drop(corpus_oil_price_resampled_2.index[:1], inplace=True)
corpus_alternative_energies_resampled_2.drop(corpus_alternative_energies_resampled_2.index[:1], inplace=True)
corpus_crude_oil_resampled_2.drop(corpus_crude_oil_resampled_2.index[:1], inplace=True)
1665/121: corpus_oil_price_resampled_2
1665/122:
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_oil_price_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_oil_price_resampled_2.plot(y="$ 2020", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/123:
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_alternative_energies_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_alternative_energies_resampled_2.plot(y="renewables", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/124:
sns.set(rc = {'figure.figsize':(15,8)})
ax = corpus_crude_oil_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_crude_oil_resampled_2.plot(y="oil production", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/125:
sns.set(rc = {'figure.figsize':(12,6)})
ax = corpus_oil_price_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_oil_price_resampled_2.plot(y="$ 2020", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/126:
sns.set(rc = {'figure.figsize':(12,6)})
ax = corpus_alternative_energies_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_alternative_energies_resampled_2.plot(y="renewables", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1665/127:
sns.set(rc = {'figure.figsize':(12,6)})
ax = corpus_crude_oil_resampled_2.plot(y="vader_sentiment", legend=False)
ax2 = ax.twinx()
corpus_crude_oil_resampled_2.plot(y="oil production", ax=ax2, legend=False, color="r")
ax.figure.legend()
plt.show()
1666/1:
corpus_oil_price=pd.read_csv('data/corpus_oil_price.csv', index_col=0)
corpus_alternative_energies=pd.read_csv('data/corpus_alternative_energies.csv', index_col=0)
corpus_crude_oil=pd.read_csv('data/corpus_crude_oil.csv', index_col=0)
corpus_oil_price