<a href="https://colab.research.google.com/github/binmuji/petrol-consumption-world/blob/main/EDA_Petrol_World.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup Library & Dataset

## Import Library

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px

## Import Dataset

In [2]:
petrol_world = pd.read_csv("Petrol Dataset Version 2.csv", encoding="latin-1")

## Quick Look

### View Row x Colomn

In [3]:
petrol_world.shape

(181, 11)

### View Sample Data

In [4]:
petrol_world.head(10)

Unnamed: 0,S#,Country,Daily Oil Consumption (Barrels),World Share,Yearly Gallons Per Capita,Price Per Gallon (USD),Price Per Liter (USD),Price Per Liter (PKR),GDP Per Capita ( USD ),Gallons GDP Per Capita Can Buy,xTimes Yearly Gallons Per Capita Buy
0,1,United States,19687287,20%,934.3,5.19,1.37,289.97,63414,12218,13
1,2,China,12791553,13%,138.7,5.42,1.43,302.87,10435,1925,14
2,3,India,4443000,5%,51.4,5.05,1.33,281.93,1901,376,7
3,4,Japan,4012877,4%,481.5,4.69,1.24,262.05,40193,8570,18
4,5,Russia,3631287,4%,383.2,3.41,0.9,190.56,10127,2970,8
5,6,Saudi Arabia,3302000,3%,1560.2,2.35,0.62,131.34,20110,8557,5
6,7,Brazil,2984000,3%,221.9,5.36,1.42,299.27,6797,1268,6
7,8,South Korea,2605440,3%,783.4,6.09,1.61,340.52,31632,5194,7
8,9,Canada,2486301,3%,1047.6,6.76,1.79,377.74,43258,6399,6
9,10,Germany,2383393,3%,444.5,7.65,2.02,427.44,46208,6040,14


### View Data Types of Colomn

In [5]:
petrol_world.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 181 entries, 0 to 180
Data columns (total 11 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   S#                                    181 non-null    int64  
 1   Country                               181 non-null    object 
 2   Daily Oil Consumption (Barrels)       181 non-null    object 
 3   World Share                           181 non-null    object 
 4   Yearly Gallons Per Capita             181 non-null    float64
 5   Price Per Gallon (USD)                181 non-null    float64
 6   Price Per Liter (USD)                 181 non-null    float64
 7   Price Per Liter (PKR)                 181 non-null    float64
 8   GDP Per Capita ( USD )                181 non-null    object 
 9   Gallons GDP Per Capita Can Buy        181 non-null    object 
 10  xTimes Yearly Gallons Per Capita Buy  181 non-null    int64  
dtypes: float64(4), int6

# Data Processing

## Missing Value

In [6]:
petrol_world.isnull().sum()

S#                                      0
Country                                 0
Daily Oil Consumption (Barrels)         0
World Share                             0
Yearly Gallons Per Capita               0
Price Per Gallon (USD)                  0
Price Per Liter (USD)                   0
Price Per Liter (PKR)                   0
GDP Per Capita ( USD )                  0
Gallons GDP Per Capita Can Buy          0
xTimes Yearly Gallons Per Capita Buy    0
dtype: int64

## Duplicate Value

In [7]:
petrol_world.duplicated().sum()

0

## Chande Data Types

In [8]:
petrol_world["Daily Oil Consumption (Barrels)"]=petrol_world["Daily Oil Consumption (Barrels)"].apply(lambda x: x.replace(',',''))
petrol_world["Daily Oil Consumption (Barrels)"]=petrol_world["Daily Oil Consumption (Barrels)"].astype(float)

In [9]:
petrol_world["World Share"]=petrol_world["World Share"].apply(lambda x: x.replace('%',''))
petrol_world["World Share"]=petrol_world["World Share"].astype(float)

In [10]:
petrol_world["GDP Per Capita ( USD )"]=petrol_world["GDP Per Capita ( USD )"].apply(lambda x: x.replace(',',''))
petrol_world["GDP Per Capita ( USD )"]=petrol_world["GDP Per Capita ( USD )"].astype(float)

In [11]:
petrol_world["Gallons GDP Per Capita Can Buy"]=petrol_world["Gallons GDP Per Capita Can Buy"].apply(lambda x: x.replace(',',''))
petrol_world["Gallons GDP Per Capita Can Buy"]=petrol_world["Gallons GDP Per Capita Can Buy"].astype(float)

In [12]:
petrol_world.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 181 entries, 0 to 180
Data columns (total 11 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   S#                                    181 non-null    int64  
 1   Country                               181 non-null    object 
 2   Daily Oil Consumption (Barrels)       181 non-null    float64
 3   World Share                           181 non-null    float64
 4   Yearly Gallons Per Capita             181 non-null    float64
 5   Price Per Gallon (USD)                181 non-null    float64
 6   Price Per Liter (USD)                 181 non-null    float64
 7   Price Per Liter (PKR)                 181 non-null    float64
 8   GDP Per Capita ( USD )                181 non-null    float64
 9   Gallons GDP Per Capita Can Buy        181 non-null    float64
 10  xTimes Yearly Gallons Per Capita Buy  181 non-null    int64  
dtypes: float64(8), int6

# Descriptive Analyst

## Basic Statistic

In [13]:
petrol_world.describe()

Unnamed: 0,S#,Daily Oil Consumption (Barrels),World Share,Yearly Gallons Per Capita,Price Per Gallon (USD),Price Per Liter (USD),Price Per Liter (PKR),GDP Per Capita ( USD ),Gallons GDP Per Capita Can Buy,xTimes Yearly Gallons Per Capita Buy
count,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0,181.0
mean,91.0,533573.0,0.497238,332.00663,5.695691,1.505138,318.219227,15259.790055,4179.342541,14.20442
std,52.394338,1858067.0,1.922453,436.558735,4.370484,1.154575,244.192081,20542.231615,15436.362412,48.613866
min,1.0,51.0,0.0,2.2,0.08,0.02,4.65,274.0,24.0,1.0
25%,46.0,20036.0,0.0,53.9,4.15,1.1,232.02,2033.0,473.0,6.0
50%,91.0,61612.0,0.0,180.2,5.28,1.4,295.04,6127.0,1410.0,9.0
75%,136.0,262352.0,0.0,424.6,6.76,1.79,377.74,20234.0,4103.0,12.0
max,181.0,19687290.0,20.0,3679.5,54.89,14.5,3066.75,115874.0,200700.0,654.0


## Top 10 Yearly Oil Budget

In [14]:
petrol_world['Yearly Oil Budget (USD)'] = petrol_world['Daily Oil Consumption (Barrels)']*365 * petrol_world['Price Per Gallon (USD)']

In [15]:
YOB = petrol_world.sort_values('Yearly Oil Budget (USD)',ascending = False)
YOB

Unnamed: 0,S#,Country,Daily Oil Consumption (Barrels),World Share,Yearly Gallons Per Capita,Price Per Gallon (USD),Price Per Liter (USD),Price Per Liter (PKR),GDP Per Capita ( USD ),Gallons GDP Per Capita Can Buy,xTimes Yearly Gallons Per Capita Buy,Yearly Oil Budget (USD)
0,1,United States,19687287.0,20.0,934.3,5.19,1.37,289.97,63414.0,12218.0,13,3.729461e+10
1,2,China,12791553.0,13.0,138.7,5.42,1.43,302.87,10435.0,1925.0,14,2.530553e+10
2,3,India,4443000.0,5.0,51.4,5.05,1.33,281.93,1901.0,376.0,7,8.189560e+09
3,4,Japan,4012877.0,4.0,481.5,4.69,1.24,262.05,40193.0,8570.0,18,6.869443e+09
9,10,Germany,2383393.0,3.0,444.5,7.65,2.02,427.44,46208.0,6040.0,14,6.655029e+09
...,...,...,...,...,...,...,...,...,...,...,...,...
165,166,British Virgin Islands,1240.0,0.0,647.6,3.60,0.95,200.93,34246.0,9513.0,15,1.629360e+06
179,180,Montserrat,400.0,0.0,1231.1,4.57,1.21,255.07,12589.0,2755.0,2,6.672200e+05
171,172,Kiribati,400.0,0.0,54.5,4.09,1.08,228.42,1671.0,409.0,7,5.971400e+05
177,178,Niue,51.0,0.0,484.4,11.43,3.02,638.73,15586.0,1364.0,3,2.127694e+05


In [24]:
YOB = petrol_world.sort_values('Yearly Oil Budget (USD)',ascending = False)
fig = px.bar(YOB, x= 'Country', y = 'Yearly Oil Budget (USD)', color = "Country", title = 'Yearly Oil Budget (USD)', hover_data = ['Daily Oil Consumption (Barrels)', 'Price Per Gallon (USD)'])
fig.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig.update_layout(
    height = 800,
    width=1600
)
fig.show()

## Top 10 Daily Oil Consumption (Barrels) (High & Low)

In [17]:
fig1 = px.bar(petrol_world, x= 'Country', y = 'Daily Oil Consumption (Barrels)', color = 'Country', title = 'Daily Oil Consumption (Barrels)', hover_data = ['GDP Per Capita ( USD )', 'World Share'])
fig1.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig1.update_layout(
    height = 800,
    width=1600
)
fig1.show()

## Top 10 Price Per Gallon (USD)

In [23]:
PPG.head(20)

Unnamed: 0,S#,Country,Daily Oil Consumption (Barrels),World Share,Yearly Gallons Per Capita,Price Per Gallon (USD),Price Per Liter (USD),Price Per Liter (PKR),GDP Per Capita ( USD ),Gallons GDP Per Capita Can Buy,xTimes Yearly Gallons Per Capita Buy,Yearly Oil Budget (USD)
147,148,North Korea,18000.0,0.0,10.9,54.89,14.5,3066.75,1300.0,24.0,2,360627300.0
180,181,Tonga,899.0,0.0,136.3,16.2,4.28,905.22,4903.0,303.0,2,5315787.0
177,178,Niue,51.0,0.0,484.4,11.43,3.02,638.73,15586.0,1364.0,3,212769.4
40,41,Hong Kong,408491.0,0.0,864.5,11.35,3.0,634.29,46324.0,4081.0,5,1692276000.0
58,59,Norway,204090.0,0.0,595.8,10.22,2.7,571.26,67390.0,6594.0,11,761316900.0
53,54,Denmark,158194.0,0.0,424.6,10.04,2.65,561.11,61063.0,6082.0,14,579717700.0
63,64,Finland,210030.0,0.0,585.7,10.01,2.64,559.21,48773.0,4872.0,8,767376100.0
141,142,Iceland,19090.0,0.0,880.9,9.83,2.6,549.48,59270.0,6030.0,7,68493970.0
47,48,Greece,296101.0,0.0,427.6,9.49,2.51,530.02,17623.0,1857.0,4,1025649000.0
22,23,Netherlands,937098.0,1.0,846.0,9.33,2.47,521.35,52397.0,5616.0,7,3191240000.0


In [20]:
PPG = petrol_world.sort_values('Price Per Gallon (USD)',ascending = False)
fig2 = px.bar(PPG, x= 'Country', y = 'Price Per Gallon (USD)', color = 'Country', title = 'Price Per Gallon (USD)', hover_data = ['GDP Per Capita ( USD )', 'Yearly Gallons Per Capita'])
fig2.update_traces(textfont_size=12, textangle=0, textposition="outside", cliponaxis=False)
fig2.update_layout(
    height = 800,
    width=1600
)
fig2.show()