In [104]:
# All the libraries we are using:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup


In [105]:
# let's load the csv file into a dataframe
df = pd.read_csv('../Data/cleaned_mes.csv', index_col='Unnamed: 0') # To avoid using index_col here I could have used the argument index=False

In [106]:
# As we are focusing on the price I am going to create 2 new columns, one with the consume per unit of distance and the other one with a price.
# I dont have any data of the dates this services were delivered and thus I can not inffer how much did it cost to deliver them, however,
# I am going to use to prices of this gas types today (17/03/2022): SP98 1.955€/l, E10 1.825€/l	
# Source: https://www.dieselogasolina.com/
df.sample(10)

Unnamed: 0,distance,consume,speed,temp_inside,temp_outside,gas_type,AC,rain,sun,snow,temp_gradient
313,11.3,4.3,38,22.0,17,SP98,False,False,False,False,-5.0
339,15.4,4.1,45,22.0,24,E10,False,False,False,False,2.0
298,16.3,4.5,58,22.0,16,SP98,False,False,False,False,-6.0
70,12.3,5.2,55,21.5,12,SP98,False,False,False,False,-9.5
279,24.7,4.5,26,22.0,10,SP98,False,False,False,False,-12.0
100,5.3,4.1,34,21.5,9,SP98,False,False,False,False,-12.5
187,12.3,4.8,41,22.5,7,E10,False,False,False,False,-15.5
179,16.2,5.2,29,21.0,0,E10,False,False,False,False,-21.0
90,11.8,4.3,37,20.0,6,SP98,False,False,False,False,-14.0
309,31.9,4.3,33,22.0,16,SP98,False,False,False,False,-6.0


In [107]:
# I have also created a little function to get the price in real time, might be useful for future uses of this code,
def find_price():
    page = requests.get('https://www.dieselogasolina.com/')
    soup = BeautifulSoup(page.content, 'html.parser')
    table = soup.find('table').find_all('tr')
    p_E10 = table[1].find_all('td')[1].text
    p_SP98 = table[2].find_all('td')[1].text
    return float(p_SP98[:5].replace(',','.')), float(p_E10[:5].replace(',','.'))

In [108]:
# Let's start by creating a function to transform the dataframe:
def df_trans(dis, con, gt, prices = find_price()): # Where x is a row in our dataframe, datos de https://www.dieselogasolina.com/
    """ This function is aimed to transform the dataframe and create 2 new columns with the consume per unit of distance and the price per unit
    of distance, it would also be easy to include a column with the total price (distance*price/distance). It should be used as:
                                df[new_columns] = df.apply(lambda x: df_trans(x.distance, x.consume,x.speed,prices=[...]), axis=1)
    It requires an argument with the prices of the gas we are using: [price_SP98, price_E10]"""
    #print(gt)
    cpd = con/dis # The consume per distance is the first thing we can calculate as it doesn't require any transformation


    price = dict({'SP98':prices[0], 'E10':prices[1]})

    ppd = price[gt]*cpd # We can already know the price per distance

    return pd.Series({'con_dis':cpd, 'price_dis':ppd})

In [109]:
fp = find_price()
df['gas_price'] = df.gas_type.apply(lambda x: fp[0] if x == 'SP98' else fp[1])

In [110]:
df[['con_dis', 'price_dis']] = df.apply(lambda x:df_trans(x.distance,x.consume,x.gas_type), axis=1)

In [111]:
# Now that we have transformed the Dataframe let's save it and work with it in another jupyter,
df.to_csv('../Data/cleaned_mes.csv', index=False)