# NLP with oil and renewable energies project

#### Get libraries

In [225]:
import requests
import re
from bs4 import BeautifulSoup
from collections import Counter
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB  
from imblearn.over_sampling import SMOTE
from imblearn.under_sampling import RandomUnderSampler, NearMiss
import numpy as np
from PIL import Image
from sklearn.linear_model import LogisticRegression
import argparse
from matplotlib import pyplot as plt
import wordcloud
import json

## Extraction procedure for the "Oil prices" section

#### For lop that recreates web site urls

In [198]:
numbers = list(range(2, 112))
url_list = []
for i in (numbers):
    str_a = 'https://oilprice.com/Energy/Oil-Prices/Page-'
    str_b = '.html'
    complete_url = str_a + str(i) + str_b
    url_list.append(complete_url)       

#### For loops that download pages with links

In [199]:
for j in url_list:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    page = requests.get(j, headers=headers)
    response_a = requests.get(j)
    with open('pages/'+'oil_price'+str(url_list.index(j))+'.html', 'w', encoding="utf-8") as file: 
            file.write(response_a.text)   

In [200]:
missing = ['https://oilprice.com/Energy/Oil-Prices']
for j in missing:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    page = requests.get(j, headers=headers)
    response_a = requests.get(j)
    with open('pages/'+'oil_price_a'+str(missing.index(j))+'.html', 'w', encoding="utf-8") as file: 
            file.write(response_a.text)   

#### For loops that filter all urls out of the pages and appends them to a list

In [201]:
link_list = []
for j in url_list:
    text = open('pages/oil_price'+str(url_list.index(j))+'.html', encoding="utf-8").read()
    soup = BeautifulSoup(text, 'html.parser')
    for a in soup.find_all('a', href=True): 
        if a.text: 
            link_list.append(a['href'])

In [202]:
for j in missing:
    text = open('pages/oil_price'+str(missing.index(j))+'.html', encoding="utf-8").read()
    soup = BeautifulSoup(text, 'html.parser')
    for a in soup.find_all('a', href=True): 
        if a.text: 
            link_list.append(a['href'])

In [203]:
link_list

['https://oilprice.com/',
 'https://oilprice.com/',
 'https://oilprice.com/oil-price-charts/',
 'https://oilprice.com/rig-count',
 'https://oilprice.com/Energy/',
 'https://oilprice.com/Energy/Energy-General/',
 'https://oilprice.com/Energy/Oil-Prices/',
 'https://oilprice.com/Energy/Crude-Oil/',
 'https://oilprice.com/Energy/Heating-Oil/',
 'https://oilprice.com/Energy/Gas-Prices/',
 'https://oilprice.com/Energy/Natural-Gas/',
 'https://oilprice.com/Energy/Coal/',
 'https://oilprice.com/oilcompanynews',
 'https://oilprice.com/Interviews',
 'https://oilprice.com/Alternative-Energy/',
 'https://oilprice.com/Alternative-Energy/Nuclear-Power/',
 'https://oilprice.com/Alternative-Energy/Solar-Energy/',
 'https://oilprice.com/Alternative-Energy/Hydroelectric/',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/',
 'https://oilprice.com/Alternative-Energy/Geothermal-Energy/',
 'https://oilprice.com/Alternative-Energy/Wind-Power/',
 'https://oilprice.com/Alternative-Energy/Fuel-Cells

#### Some filtering and transformation steps 

In [204]:
df = pd.DataFrame(link_list, columns =['links'])
filter_lists=df[df["links"].str.contains("https://oilprice.com/Energy/Oil-Prices/")]
filter_lists2 = filter_lists.drop_duplicates(subset = ["links"])
filter_lists3 = filter_lists2[~filter_lists2.links.str.contains("Page-")]
filter_lists3.drop(6, inplace=True)
col_one_list = filter_lists3['links'].tolist()
col_one_list

['https://oilprice.com/Energy/Oil-Prices/Oil-Prices-Rise-After-Six-Week-Losing-Streak.html',
 'https://oilprice.com/Energy/Oil-Prices/Jefferies-Fully-Reopened-World-Could-See-150-Oil.html',
 'https://oilprice.com/Energy/Oil-Prices/Oil-Set-For-Third-Consecutive-Week-Of-Losses.html',
 'https://oilprice.com/Energy/Oil-Prices/The-Double-Irony-Of-Bidens-Plea-To-OPEC.html',
 'https://oilprice.com/Energy/Oil-Prices/The-Oil-Price-Rally-Is-Far-From-Over.html',
 'https://oilprice.com/Energy/Oil-Prices/Biden-Threatens-OPEC-With-Undisclosed-Tools.html',
 'https://oilprice.com/Energy/Oil-Prices/Citi-Oil-Will-Continue-Rising-This-Quarter.html',
 'https://oilprice.com/Energy/Oil-Prices/Oil-Rally-Reverses-On-Signs-Of-Cooling-Demand.html',
 'https://oilprice.com/Energy/Oil-Prices/Oil-Prices-Slide-On-Fears-Of-Tighter-Fed-Policy.html',
 'https://oilprice.com/Energy/Oil-Prices/Why-Oil-Prices-Will-Rise-In-The-Short-Term.html',
 'https://oilprice.com/Energy/Oil-Prices/Only-OPEC-Has-The-Power-To-Send-Oil-Pri

#### For loop that opens all generated urls and stores their content into separate files

In [205]:
#Because this step takes a lot of time to be done, it is commented out
#for j in col_one_list:
#    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#    page = requests.get(j, headers=headers)
#    response_a = requests.get(j)
#    with open('oil-price/'+'oil_price'+str(col_one_list.index(j))+'.html', 'w', encoding="utf-8") as file: 
#            file.write(response_a.text)   

## Extraction procedure for the "alternative energies" section

#### For lop that recreates web site urls

In [206]:
numbers_2 = list(range(2, 120))
url_list_2 = []
for i in (numbers_2):
    str_c = 'https://oilprice.com/Alternative-Energy/Page-'
    str_d = '.html'
    complete_url_2 = str_c + str(i) + str_d
    url_list_2.append(complete_url_2)       

#### For loops that download pages with links

In [207]:
for j in url_list_2:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    page = requests.get(j, headers=headers)
    response_a = requests.get(j)
    with open('pages/'+'alternative_energy'+str(url_list_2.index(j))+'.html', 'w', encoding="utf-8") as file: 
            file.write(response_a.text)   

In [208]:
missing2 = ['https://oilprice.com/Alternative-Energy/']
for j in missing2:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    page = requests.get(j, headers=headers)
    response_a = requests.get(j)
    with open('pages/'+'alternative_energy_a'+str(missing2.index(j))+'.html', 'w', encoding="utf-8") as file: 
            file.write(response_a.text)   

#### For loops that filter all urls out of the pages and appends them to a list

In [209]:
link_list_2 = []
for j in url_list_2:
    text = open('pages/alternative_energy'+str(url_list_2.index(j))+'.html', encoding="utf-8").read()
    soup = BeautifulSoup(text, 'html.parser')
    for a in soup.find_all('a', href=True): 
        if a.text: 
            link_list_2.append(a['href'])

In [210]:
for j in missing2:
    text = open('pages/alternative_energy'+str(missing2.index(j))+'.html', encoding="utf-8").read()
    soup = BeautifulSoup(text, 'html.parser')
    for a in soup.find_all('a', href=True): 
        if a.text: 
            link_list_2.append(a['href'])

In [211]:
link_list_2

['https://oilprice.com/',
 'https://oilprice.com/',
 'https://oilprice.com/oil-price-charts/',
 'https://oilprice.com/rig-count',
 'https://oilprice.com/Energy/',
 'https://oilprice.com/Energy/Energy-General/',
 'https://oilprice.com/Energy/Oil-Prices/',
 'https://oilprice.com/Energy/Crude-Oil/',
 'https://oilprice.com/Energy/Heating-Oil/',
 'https://oilprice.com/Energy/Gas-Prices/',
 'https://oilprice.com/Energy/Natural-Gas/',
 'https://oilprice.com/Energy/Coal/',
 'https://oilprice.com/oilcompanynews',
 'https://oilprice.com/Interviews',
 'https://oilprice.com/Alternative-Energy/',
 'https://oilprice.com/Alternative-Energy/Nuclear-Power/',
 'https://oilprice.com/Alternative-Energy/Solar-Energy/',
 'https://oilprice.com/Alternative-Energy/Hydroelectric/',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/',
 'https://oilprice.com/Alternative-Energy/Geothermal-Energy/',
 'https://oilprice.com/Alternative-Energy/Wind-Power/',
 'https://oilprice.com/Alternative-Energy/Fuel-Cells

#### Some filtering and transformation steps 

In [212]:
df_2 = pd.DataFrame(link_list_2, columns =['links'])
filter_lists_b=df_2[df_2["links"].str.contains("https://oilprice.com/Alternative-Energy/")]
filter_lists_b_2 = filter_lists_b.drop_duplicates(subset = ["links"])

In [213]:
filter_lists_b_3 = filter_lists_b_2[~filter_lists_b_2.links.str.contains("Page-")]
filter_lists_b_3.drop(index=[14, 15, 16, 17, 18, 19, 20, 21, 22, 23], inplace=True)
filter_lists_b_3 = filter_lists_b_3[~filter_lists_b_3.links.str.contains("https://oilprice.com/Alternative-Energy/Nuclear-Power/")]
#filter_lists_b_3.to_csv("out2.csv")
col_one_list_2 = filter_lists_b_3['links'].tolist()
col_one_list_2

['https://oilprice.com/Alternative-Energy/Renewable-Energy/Could-Greenflation-Derail-The-Commodity-Supercycle.html',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/Southeast-Asia-Needs-2-Trillion-To-Realize-Its-Green-Economy-Ambitions.html',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/IEA-Chief-Dont-Blame-Renewables-For-Europes-Energy-Crunch.html',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/Chinas-Control-Over-Key-Battery-Metals-Should-Worry-The-US.html',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/Why-Chevron-And-Exxon-Shun-Solar-And-Wind.html',
 'https://oilprice.com/Alternative-Energy/Solar-Energy/The-US-Is-Set-To-Break-Another-Solar-Record-Despite-Rising-Costs.html',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/Democrats-Divided-On-Bidens-35-Trillion-Green-Energy-Plan.html',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/Middle-East-Oil-Producers-Make-Surprising-Pivot-Towards-Renewables.html',
 'htt

#### For loop that opens all generated urls and stores their content into separate files

In [214]:
#Because this step takes a lot of time to be done, it is commented out
#for j in col_one_list_2:
#    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#    page = requests.get(j, headers=headers)
#    response_a = requests.get(j)
#    with open('alternative-energy/'+'alternative_energy'+str(col_one_list_2.index(j))+'.html', 'w', encoding="utf-8") as file: 
#            file.write(response_a.text)   

## Extraction procedure for the "crude oil" section

#### For lop that recreates web site urls

In [215]:
numbers_3 = list(range(2, 252))
url_list_3 = []
for i in (numbers_3):
    str_e = 'https://oilprice.com/Energy/Crude-Oil/Page-'
    str_f = '.html'
    complete_url_3 = str_e + str(i) + str_f
    url_list_3.append(complete_url_3)      

#### For loops that download pages with links

In [216]:
for j in url_list_3:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    page = requests.get(j, headers=headers)
    response_a = requests.get(j)
    with open('pages/'+'crude_oil'+str(url_list_3.index(j))+'.html', 'w', encoding="utf-8") as file: 
            file.write(response_a.text)   

In [217]:
missing3 = ['https://oilprice.com/Alternative-Energy/']
for j in missing3:
    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
    page = requests.get(j, headers=headers)
    response_a = requests.get(j)
    with open('pages/'+'crude_oil_a'+str(missing3.index(j))+'.html', 'w', encoding="utf-8") as file: 
            file.write(response_a.text)   

#### For loops that filter all urls out of the pages and appends them to a list

In [218]:
link_list_3 = []
for j in url_list_3:
    text = open('pages/crude_oil'+str(url_list_3.index(j))+'.html', encoding="utf-8").read()
    soup = BeautifulSoup(text, 'html.parser')
    for a in soup.find_all('a', href=True): 
        if a.text: 
            link_list_3.append(a['href'])

In [219]:
for j in missing3:
    text = open('pages/crude_oil'+str(missing3.index(j))+'.html', encoding="utf-8").read()
    soup = BeautifulSoup(text, 'html.parser')
    for a in soup.find_all('a', href=True): 
        if a.text: 
            link_list_3.append(a['href'])

In [220]:
link_list_3

['https://oilprice.com/',
 'https://oilprice.com/',
 'https://oilprice.com/oil-price-charts/',
 'https://oilprice.com/rig-count',
 'https://oilprice.com/Energy/',
 'https://oilprice.com/Energy/Energy-General/',
 'https://oilprice.com/Energy/Oil-Prices/',
 'https://oilprice.com/Energy/Crude-Oil/',
 'https://oilprice.com/Energy/Heating-Oil/',
 'https://oilprice.com/Energy/Gas-Prices/',
 'https://oilprice.com/Energy/Natural-Gas/',
 'https://oilprice.com/Energy/Coal/',
 'https://oilprice.com/oilcompanynews',
 'https://oilprice.com/Interviews',
 'https://oilprice.com/Alternative-Energy/',
 'https://oilprice.com/Alternative-Energy/Nuclear-Power/',
 'https://oilprice.com/Alternative-Energy/Solar-Energy/',
 'https://oilprice.com/Alternative-Energy/Hydroelectric/',
 'https://oilprice.com/Alternative-Energy/Renewable-Energy/',
 'https://oilprice.com/Alternative-Energy/Geothermal-Energy/',
 'https://oilprice.com/Alternative-Energy/Wind-Power/',
 'https://oilprice.com/Alternative-Energy/Fuel-Cells

#### Some filtering and transformation steps 

In [221]:
df_3 = pd.DataFrame(link_list_3, columns =['links'])
filter_lists_c=df_3[df_3["links"].str.contains("https://oilprice.com/Energy/Crude-Oil/")]
filter_lists_b_3 = filter_lists_c.drop_duplicates(subset = ["links"])
filter_lists_b_3

Unnamed: 0,links
7,https://oilprice.com/Energy/Crude-Oil/
73,https://oilprice.com/Energy/Crude-Oil/Aramco-C...
74,https://oilprice.com/Energy/Crude-Oil/US-Rig-C...
76,https://oilprice.com/Energy/Crude-Oil/Canadas-...
78,https://oilprice.com/Energy/Crude-Oil/The-Real...
...,...
36683,https://oilprice.com/Energy/Crude-Oil/A-Detail...
36685,https://oilprice.com/Energy/Crude-Oil/What-Is-...
36687,https://oilprice.com/Energy/Crude-Oil/Oil-Shal...
36689,https://oilprice.com/Energy/Crude-Oil/Oil-Is-N...


In [222]:
#filter_lists_b_3 = filter_lists_b_3[~filter_lists_b_3.links.str.contains("Page-")]
#filter_lists_b_3.drop(index=[7], inplace=True)
#filter_lists_b_3.to_csv("out3.csv")
col_one_list_3 = filter_lists_b_3['links'].tolist()
col_one_list_3

['https://oilprice.com/Energy/Crude-Oil/',
 'https://oilprice.com/Energy/Crude-Oil/Aramco-CEO-Rushed-Energy-Transition-Could-Spark-Social-Unrest.html',
 'https://oilprice.com/Energy/Crude-Oil/US-Rig-Count-Unchanged-After-Crazy-Week-In-Oil-Markets.html',
 'https://oilprice.com/Energy/Crude-Oil/Canadas-Ambitious-New-Plan-To-Save-Its-Oil-Sands.html',
 'https://oilprice.com/Energy/Crude-Oil/The-Real-Reason-Why-OPEC-Wont-Open-The-Taps.html',
 'https://oilprice.com/Energy/Crude-Oil/Oil-Bulls-Rejoice-As-Bidens-Supply-Strategy-Backfires.html',
 'https://oilprice.com/Energy/Crude-Oil/UAE-Pumps-6-Billion-Into-Oil-And-Gas-Expansion-Projects.html',
 'https://oilprice.com/Energy/Crude-Oil/China-Keeps-Markets-In-The-Dark-About-SPR-Release.html',
 'https://oilprice.com/Energy/Crude-Oil/Oil-Markets-Unimpressed-By-Small-Inventory-Build.html',
 'https://oilprice.com/Energy/Crude-Oil/SPR-Release-Only-Triggered-A-Brief-Selloff-In-Crude-Oil.html',
 'https://oilprice.com/Energy/Crude-Oil/Biden-Administratio

#### For loop that opens all generated urls and stores their content into separate files

In [182]:
#Because this step takes a lot of time to be done, it is commented out
#for j in col_one_list_3:
#    headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
#    page = requests.get(j, headers=headers)
#    response_a = requests.get(j)
#    with open('crude-oil/'+'crude_oil'+str(col_one_list_3.index(j))+'.html', 'w', encoding="utf-8") as file: 
#            file.write(response_a.text)   

## Gathering text from the extracted files for all three sections

### Oil prices

In [237]:
titles = []
dates = []
contents = []
for z in col_one_list:
    raw_text = open('oil-price/oil_price'+str(col_one_list.index(z))+'.html', encoding="utf-8").read()
    text_soup = BeautifulSoup(raw_text)
    text = text_soup.find('script', type='application/ld+json')
    json_object = json.loads(text.contents[0])
    title=(json_object['name'])
    datePublished=(json_object['datePublished'])
    content=(json_object['articleBody'])
    titles.append(title)
    dates.append(datePublished)
    contents.append(content)

FileNotFoundError: [Errno 2] No such file or directory: 'oil-price/oil_price2188.html'

In [239]:
oil_price_df = pd.DataFrame(
    {'title': titles,
     'date_published': dates,
     'content': contents
    })
oil_price_df

Unnamed: 0,title,date_published,content
0,$70 Oil Won't Keep OPEC Happy,2021-11-29T19:00:00-06:00,One day&mdash;that was all it took oil prices ...
1,Why This Oil Price Slump May Not Be Bad News F...,2021-12-02T19:00:00-06:00,Crude oil prices are plunging under the weight...
2,The Double Irony Of Biden's Plea To OPEC,2021-11-10T13:00:00-06:00,As I wrote in February of this year in&nbsp;Th...
3,The Oil Price Rally Is Far From Over,2021-11-09T19:00:00-06:00,How much higher could oil prices go This is th...
4,"Biden Threatens OPEC With Undisclosed ""Tools""",2021-11-08T10:00:00-06:00,"""There are other tools in the arsenal that we ..."
...,...,...,...
2183,A Disobliging Update on the Price of Oil,2010-01-09T16:21:21-06:00,There still seems to be a deplorable uncertain...
2184,What Happens When the Wells Run Dry,2010-01-08T16:55:39-06:00,One nagging question that the industrial world...
2185,Petro Unfriendly California Changing it's Stan...,2009-12-30T14:18:05-06:00,The financial crisis changed the way people th...
2186,New Techniques Oil Companies are Using in Dril...,2009-11-25T16:35:34-06:00,As the politics and philosophical arguments ab...


### Alternative energy

In [240]:
titles2 = []
dates2 = []
contents2 = []
for z in col_one_list_2:
    raw_text = open('alternative-energy/alternative_energy'+str(col_one_list_2.index(z))+'.html', encoding="utf-8").read()
    text_soup = BeautifulSoup(raw_text)
    text = text_soup.find('script', type='application/ld+json')
    json_object = json.loads(text.contents[0])
    title=(json_object['name'])
    datePublished=(json_object['datePublished'])
    content=(json_object['articleBody'])
    titles2.append(title)
    dates2.append(datePublished)
    contents2.append(content)

In [242]:
alternative_energies_df = pd.DataFrame(
    {'title': titles2,
     'date_published': dates2,
     'content': contents2
    })
alternative_energies_df

Unnamed: 0,title,date_published,content
0,Could Greenflation Derail The Commodity Superc...,2021-11-30T16:00:00-06:00,"The energy transition is in full swing, with t..."
1,Southeast Asia Needs $2 Trillion To Realize It...,2021-10-07T14:00:00-05:00,Investing $2 trillion over the next decade in ...
2,IEA Chief Don't Blame Renewables For Europe's ...,2021-09-28T11:00:00-05:00,The energy squeeze in Europe has nothing to do...
3,China's Control Over Key Battery Metals Should...,2021-09-26T12:00:00-05:00,The writing is on the wall for the fossil fuel...
4,Why Chevron And Exxon Shun Solar And Wind,2021-09-26T16:00:00-05:00,The world's largest oil companies are under gr...
...,...,...,...
1826,Wind Power - A Renewable That's Actually Deliv...,2009-07-24T07:48:57-05:00,Unlike it's fellow alternatives such as Solar ...
1827,Fuel Cells - An Environmental Saviour or Just ...,2009-07-24T07:48:57-05:00,A fuel cell is basically an engine that produc...
1828,Hydroelectric Energy Explained,2009-07-24T07:48:00-05:00,What is hydroelectric energy Hydroelectricity...
1829,How Does Tidal Energy Work,2009-07-24T07:48:57-05:00,How Does Tidal Energy Work Tidal energy is ene...


### Crude oil

In [243]:
titles3 = []
dates3 = []
contents3 = []
for z in col_one_list_3:
    raw_text = open('crude-oil/crude_oil'+str(col_one_list_3.index(z))+'.html', encoding="utf-8").read()
    text_soup = BeautifulSoup(raw_text)
    text = text_soup.find('script', type='application/ld+json')
    json_object = json.loads(text.contents[0])
    title=(json_object['name'])
    datePublished=(json_object['datePublished'])
    content=(json_object['articleBody'])
    titles3.append(title)
    dates3.append(datePublished)
    contents3.append(content)

FileNotFoundError: [Errno 2] No such file or directory: 'crude-oil/crude_oil5005.html'

In [244]:
crude_oil_df = pd.DataFrame(
    {'title': titles3,
     'date_published': dates3,
     'content': contents3
    })
crude_oil_df

Unnamed: 0,title,date_published,content
0,Oil Bulls Rejoice As Biden's Supply Strategy B...,2021-11-24T18:00:00-06:00,President Biden&rsquo;s threat to oil producer...
1,"OPEC To Add 400,000 Bpd In January Despite Oil...",2021-12-02T08:53:55-06:00,The OPEC group is sticking to its plan to ease...
2,UAE Pumps $6 Billion Into Oil And Gas Expansio...,2021-11-24T17:00:00-06:00,The U.S.-sponsored &lsquo;relationship normali...
3,China Keeps Markets In The Dark About SPR Release,2021-11-24T16:00:00-06:00,The volume of the expected Chinese release of ...
4,Oil Markets Unimpressed By Small Crude Invento...,2021-11-24T09:36:00-06:00,"Crude oil inventories rose last week, while ga..."
...,...,...,...
5000,A Detailed Guide on the Many Different Types o...,2009-12-02T17:12:01-06:00,Some people arbitrarily speak about oil as if ...
5001,What is Peak Oil Theory A Thorough Look at Thi...,2009-10-21T16:17:44-05:00,Currently there is a lot of debate going on re...
5002,Oil Shale - So Just What Is It,2009-09-24T17:26:55-05:00,People often say &ldquo;You can&rsquo;t squeez...
5003,Oil is not a Villain Here's what it's done for us,2009-09-21T16:45:02-05:00,"For decades now, oil has often been vilified a..."


### Saving data into csvs

In [245]:
oil_price_df.to_csv("oil_price.csv")
alternative_energies_df.to_csv("alternative_energies.csv")
crude_oil_df.to_csv("crude_oil.csv")