## Regression Project

### Problem Statement
Agricultural activities have a negative impact on the enironment by contributing to CO2 emissions which in turn impacts climate change.

In [2]:
##these are the packages needed for data cleaning and eda
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

### Import the dataset

In [3]:
df = pd.read_csv("co2_emissions_from_agri.csv")


### Data Overview
After importing the data, I'm going to get a high level overview to understand its structure.

In [4]:
#inspect the top 5 rows
df.head()

Unnamed: 0,Area,Year,Savanna fires,Forest fires,Crop Residues,Rice Cultivation,Drained organic soils (CO2),Pesticides Manufacturing,Food Transport,Forestland,...,Manure Management,Fires in organic soils,Fires in humid tropical forests,On-farm energy use,Rural population,Urban population,Total Population - Male,Total Population - Female,total_emission,Average Temperature °C
0,Afghanistan,1990,14.7237,0.0557,205.6077,686.0,0.0,11.807483,63.1152,-2388.803,...,319.1763,0.0,0.0,,9655167.0,2593947.0,5348387.0,5346409.0,2198.963539,0.536167
1,Afghanistan,1991,14.7237,0.0557,209.4971,678.16,0.0,11.712073,61.2125,-2388.803,...,342.3079,0.0,0.0,,10230490.0,2763167.0,5372959.0,5372208.0,2323.876629,0.020667
2,Afghanistan,1992,14.7237,0.0557,196.5341,686.0,0.0,11.712073,53.317,-2388.803,...,349.1224,0.0,0.0,,10995568.0,2985663.0,6028494.0,6028939.0,2356.304229,-0.259583
3,Afghanistan,1993,14.7237,0.0557,230.8175,686.0,0.0,11.712073,54.3617,-2388.803,...,352.2947,0.0,0.0,,11858090.0,3237009.0,7003641.0,7000119.0,2368.470529,0.101917
4,Afghanistan,1994,14.7237,0.0557,242.0494,705.6,0.0,11.712073,53.9874,-2388.803,...,367.6784,0.0,0.0,,12690115.0,3482604.0,7733458.0,7722096.0,2500.768729,0.37225


In [5]:
#replace whitespace in column names with _
df.columns = [col.replace(" ", "_") for col in df.columns]

In [6]:
#inspect the bottom 5 rows
df.tail()

Unnamed: 0,Area,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,...,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
6960,Zimbabwe,2016,1190.0089,232.5068,70.9451,7.4088,0.0,75.0,251.1465,76500.2982,...,282.5994,0.0,0.0,417.315,10934468.0,5215894.0,6796658.0,7656047.0,98491.026347,1.12025
6961,Zimbabwe,2017,1431.1407,131.1324,108.6262,7.9458,0.0,67.0,255.7975,76500.2982,...,255.59,0.0,0.0,398.1644,11201138.0,5328766.0,6940631.0,7810471.0,97159.311553,0.0465
6962,Zimbabwe,2018,1557.583,221.6222,109.9835,8.1399,0.0,66.0,327.0897,76500.2982,...,257.2735,0.0,0.0,465.7735,11465748.0,5447513.0,7086002.0,7966181.0,97668.308205,0.516333
6963,Zimbabwe,2019,1591.6049,171.0262,45.4574,7.8322,0.0,73.0,290.1893,76500.2982,...,267.5224,0.0,0.0,444.2335,11725970.0,5571525.0,7231989.0,8122618.0,98988.062799,0.985667
6964,Zimbabwe,2020,481.9027,48.4197,108.3022,7.9733,0.0,73.0,238.7639,76500.2982,...,266.7316,0.0,0.0,444.2335,11980005.0,5700460.0,7385220.0,8284447.0,96505.221853,0.189


In [7]:
#Check the shape of the data, i.e., how many columns and rows the dataframe consists of
df.shape

(6965, 31)

The dataframe consists:
1. Columns = 6965
2. Rows = 31

In [8]:
#Get a summary of the columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6965 entries, 0 to 6964
Data columns (total 31 columns):
 #   Column                           Non-Null Count  Dtype  
---  ------                           --------------  -----  
 0   Area                             6965 non-null   object 
 1   Year                             6965 non-null   int64  
 2   Savanna_fires                    6934 non-null   float64
 3   Forest_fires                     6872 non-null   float64
 4   Crop_Residues                    5576 non-null   float64
 5   Rice_Cultivation                 6965 non-null   float64
 6   Drained_organic_soils_(CO2)      6965 non-null   float64
 7   Pesticides_Manufacturing         6965 non-null   float64
 8   Food_Transport                   6965 non-null   float64
 9   Forestland                       6472 non-null   float64
 10  Net_Forest_conversion            6472 non-null   float64
 11  Food_Household_Consumption       6492 non-null   float64
 12  Food_Retail         

- Population values should be integers, temperature should be float

- Each column should have a total of 6965 non-null values. 

- Notice that some columns have null values that will need to be looked at. 

#### Change Area column name to Country

In [9]:
df = df.rename(columns = {'Area': 'Country'})

In [10]:
df.head()

Unnamed: 0,Country,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,...,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
0,Afghanistan,1990,14.7237,0.0557,205.6077,686.0,0.0,11.807483,63.1152,-2388.803,...,319.1763,0.0,0.0,,9655167.0,2593947.0,5348387.0,5346409.0,2198.963539,0.536167
1,Afghanistan,1991,14.7237,0.0557,209.4971,678.16,0.0,11.712073,61.2125,-2388.803,...,342.3079,0.0,0.0,,10230490.0,2763167.0,5372959.0,5372208.0,2323.876629,0.020667
2,Afghanistan,1992,14.7237,0.0557,196.5341,686.0,0.0,11.712073,53.317,-2388.803,...,349.1224,0.0,0.0,,10995568.0,2985663.0,6028494.0,6028939.0,2356.304229,-0.259583
3,Afghanistan,1993,14.7237,0.0557,230.8175,686.0,0.0,11.712073,54.3617,-2388.803,...,352.2947,0.0,0.0,,11858090.0,3237009.0,7003641.0,7000119.0,2368.470529,0.101917
4,Afghanistan,1994,14.7237,0.0557,242.0494,705.6,0.0,11.712073,53.9874,-2388.803,...,367.6784,0.0,0.0,,12690115.0,3482604.0,7733458.0,7722096.0,2500.768729,0.37225


#### Check sum of NaN values is each column

In [11]:
# checking for missing values
df.isnull().sum()

Country                               0
Year                                  0
Savanna_fires                        31
Forest_fires                         93
Crop_Residues                      1389
Rice_Cultivation                      0
Drained_organic_soils_(CO2)           0
Pesticides_Manufacturing              0
Food_Transport                        0
Forestland                          493
Net_Forest_conversion               493
Food_Household_Consumption          473
Food_Retail                           0
On-farm_Electricity_Use               0
Food_Packaging                        0
Agrifood_Systems_Waste_Disposal       0
Food_Processing                       0
Fertilizers_Manufacturing             0
IPPU                                743
Manure_applied_to_Soils             928
Manure_left_on_Pasture                0
Manure_Management                   928
Fires_in_organic_soils                0
Fires_in_humid_tropical_forests     155
On-farm_energy_use                  956


In [12]:
column_mv = df.isnull().sum()/len(df) * 100
column_mv

Country                             0.000000
Year                                0.000000
Savanna_fires                       0.445083
Forest_fires                        1.335248
Crop_Residues                      19.942570
Rice_Cultivation                    0.000000
Drained_organic_soils_(CO2)         0.000000
Pesticides_Manufacturing            0.000000
Food_Transport                      0.000000
Forestland                          7.078248
Net_Forest_conversion               7.078248
Food_Household_Consumption          6.791098
Food_Retail                         0.000000
On-farm_Electricity_Use             0.000000
Food_Packaging                      0.000000
Agrifood_Systems_Waste_Disposal     0.000000
Food_Processing                     0.000000
Fertilizers_Manufacturing           0.000000
IPPU                               10.667624
Manure_applied_to_Soils            13.323762
Manure_left_on_Pasture              0.000000
Manure_Management                  13.323762
Fires_in_o

We have quite a few columns missing values, but the % of missing values in each column is not enough to drop the entire column from the dataset.

#### Investigate missing values in each column

In [13]:
pd.set_option('display.max_columns', 50)

#### Savanna fires

In [14]:
#savana fires
df[df['Savanna_fires'].isnull()]


Unnamed: 0,Country,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,Net_Forest_conversion,Food_Household_Consumption,Food_Retail,On-farm_Electricity_Use,Food_Packaging,Agrifood_Systems_Waste_Disposal,Food_Processing,Fertilizers_Manufacturing,IPPU,Manure_applied_to_Soils,Manure_left_on_Pasture,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
2835,Holy See,1990,,,,9616.267306,0.0,11.481085,26.262663,0.0,0.0,0.0018,0.0466,112.135517,67.631366,65.072347,451.196291,2361.124955,0.6382,,5976.490513,,0.058149,,,0.0,768.0,340.0,370.0,18688.406793,0.905583
2836,Holy See,1991,,,,9654.499823,0.0,11.481085,26.262663,0.0,0.0,0.0018,0.0457,112.135517,67.631366,65.072347,451.196291,2361.124955,0.6414,,5976.490513,,0.058149,,,0.0,774.0,330.0,360.0,18726.64161,-0.098083
2837,Holy See,1992,,,,9616.267306,0.0,11.481085,26.262663,0.0,0.0,0.0018,0.0433,112.135517,67.631366,65.072347,451.196291,2357.644701,0.6399,,5976.490513,,0.058149,,,0.0,779.0,330.0,360.0,18684.924938,0.773167
2838,Holy See,1993,,,,9616.267306,0.0,11.481085,26.262663,0.0,0.0,0.0107,0.023,112.135517,67.631366,65.072347,451.196291,2357.644701,0.6012,,5976.490513,,0.058149,,,0.0,778.0,330.0,360.0,18684.874838,0.564417
2839,Holy See,1994,,,,9649.123125,0.0,11.481085,26.262663,0.0,0.0,0.0184,0.0254,112.135517,67.631366,65.072347,451.196291,2357.644701,0.5894,,5976.490513,,0.058149,,,0.0,778.0,320.0,350.0,18717.728958,1.4445
2840,Holy See,1995,,,,9687.355642,0.0,11.481085,26.262663,0.0,0.0,0.0024,0.0095,112.135517,67.631366,65.072347,451.196291,2357.644701,0.6334,,5976.490513,,0.058149,,,0.0,780.0,320.0,350.0,18755.973575,0.267167
2841,Holy See,1996,,,,9687.355642,0.0,11.481085,26.262663,0.0,0.0,0.0031,0.0146,112.135517,67.631366,65.072347,451.196291,2357.644701,0.636,,5976.490513,,0.058149,,,0.0,778.0,320.0,350.0,18755.981975,0.245167
2842,Holy See,1997,,,,9649.123125,0.0,11.481085,26.262663,0.0,0.0,0.0029,0.0114,112.135517,67.631366,65.072347,451.196291,2357.644701,0.6444,,5976.490513,,0.058149,,,0.0,782.0,320.0,340.0,18717.754458,0.889583
2843,Holy See,1998,,,,9649.123125,0.0,11.481085,26.262663,0.0,0.0,0.003,0.0201,112.135517,67.631366,65.072347,451.196291,2357.644701,0.6658,,5976.490513,,0.058149,,,0.0,781.0,320.0,340.0,18717.784658,0.77825
2844,Holy See,1999,,,,9649.123125,0.0,11.481085,26.262663,0.0,0.0,0.0025,0.0271,112.135517,67.631366,65.072347,451.196291,2357.644701,0.6923,,5976.490513,,0.058149,,,0.0,781.0,310.0,340.0,18717.817658,0.9265


#### Notes:
Holly See

These missing values seem to be missing completely at random (MAR). Why? There's only one country that's missing data for this column. It seems that this may be related to the fact that there's no desert area in this country does not have a savana. We will remove these rows as it seems like quite a few features for this country has missing values. 

Removing the rows in the next block. 

In [15]:
#create copy of df
df_copy = df.copy()

#remove records for Holy See
df_copy = df_copy.drop(df_copy[df_copy['Country'] == 'Holy See'].index)


#### Forest fires

In [16]:
pd.set_option('display.max_rows', 200)

In [18]:
df_copy[df_copy['Forest_fires'].isnull()].head()

Unnamed: 0,Country,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,Net_Forest_conversion,Food_Household_Consumption,Food_Retail,On-farm_Electricity_Use,Food_Packaging,Agrifood_Systems_Waste_Disposal,Food_Processing,Fertilizers_Manufacturing,IPPU,Manure_applied_to_Soils,Manure_left_on_Pasture,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
4117,Monaco,1990,0.0,,,8976.562525,0.0,11.481085,0.0657,0.0,0.0,0.0082,0.0222,17.004663,67.631366,3.5176,451.196291,1762.765252,32.6932,,2294.307917,,0.058149,,,0.0,29439.0,14435.0,15894.0,13617.314148,1.02675
4118,Monaco,1991,0.0,,,9014.795042,0.0,11.481085,0.0051,0.0,0.0,0.0009,0.0713,17.004663,67.631366,3.5943,0.0307,1762.765252,32.5885,,2294.307917,,0.058149,,,0.0,29624.0,14577.0,15987.0,13204.334274,0.223833
4119,Monaco,1992,0.0,,,8976.562525,0.0,11.481085,0.0308,0.0,0.0,0.0054,0.0385,17.004663,67.631366,3.6675,451.196291,1762.765252,33.5705,,2294.307917,,0.058149,,,0.0,29863.0,14722.0,16073.0,13618.319948,0.757167
4120,Monaco,1993,0.0,,,8976.562525,0.0,11.481085,0.0056,0.0,0.0,0.0118,0.0461,17.004663,67.631366,3.7309,451.196291,1762.765252,34.7454,,2294.307917,,0.058149,,,0.0,30138.0,14864.0,16160.0,13619.547048,0.493333
4121,Monaco,1994,0.0,,,9009.418345,0.0,11.481085,0.0046,0.0,0.0,0.0122,0.0959,17.004663,67.631366,3.7915,451.196291,1762.765252,33.6051,,2294.307917,,0.058149,,,0.0,30427.0,15002.0,16252.0,13651.372368,1.346


It seems like both countries above have small populations, or contirbute very little to the dataset. I'm going to write a for loop that will loop through each country in the df, and bring back all countries that have some missing values in them. 

In [54]:
def missing_values_features(df, counter):
# Create a list to store countries with more than 6 missing features
    countries_with_many_missing_features = []

# Loop through unique countries
    for country in df_copy['Country'].unique():
    # Count the number of features (columns) with NaN values for the specific country
        num_missing_features = df_copy[df_copy['Country'] == country].isnull().sum().gt(0).sum()  # Count features with NaNs
    
    # Check if the number of missing features is greater than 6
        if num_missing_features >= counter:
            countries_with_many_missing_features.append(country)

# Display the countries with more than 6 missing features
    return countries_with_many_missing_features

missing_values_features(df_copy, 6)


['Bermuda',
 'British Virgin Islands',
 'Channel Islands',
 'Guam',
 'Isle of Man',
 'Monaco',
 'Northern Mariana Islands',
 'Palau',
 'Palestine',
 'San Marino',
 'United States Virgin Islands',
 'Western Sahara']

Based on the above outputs, it seems as if some of the countries in our dataset are provinces, or small parts of the entire country. For example, we have a country called Czechoslovakia, but the country is now called Czechia, which is also found in the dataset. See below. 

In [29]:
df[df['Country'] == 'Czechia'].head()

Unnamed: 0,Country,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,Net_Forest_conversion,Food_Household_Consumption,Food_Retail,On-farm_Electricity_Use,Food_Packaging,Agrifood_Systems_Waste_Disposal,Food_Processing,Fertilizers_Manufacturing,IPPU,Manure_applied_to_Soils,Manure_left_on_Pasture,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
1695,Czechia,1993,0.0722,0.4831,419.6897,926.342278,280.3963,92.0,593.8016,-9798.65,229.8021,4168.5719,1475.0309,1389.9388,966.0299,1243.2022,2129.5889,4.549,5659.7171,814.6467,304.3294,1628.5952,0.0,0.0,2821.7952,2599961.0,7759011.0,5012029.0,5306888.0,15349.932478,0.490333
1696,Czechia,1994,0.0722,0.4831,432.7742,926.342278,280.2437,84.0,648.1824,-9798.65,229.8021,4837.0933,1475.4785,1260.2141,1187.3018,1302.6135,2418.399,6.3994,6297.8068,709.4324,261.8771,1428.7936,0.0,0.0,2870.8935,2613909.0,7747333.0,5015353.0,5305949.0,16859.552978,1.789667
1697,Czechia,1995,0.0722,0.4831,421.6088,926.342278,279.6144,94.0,554.1403,-9798.65,229.8021,5501.922,1506.2995,1280.7947,1229.3956,1388.5585,2455.9861,8.1405,6119.9692,677.7416,245.6881,1357.0556,0.0,0.0,3420.9599,2626627.0,7731829.0,5012565.0,5299136.0,17899.924478,0.72725
1698,Czechia,1996,0.0116,0.34,426.803,926.342278,278.5743,72.0,802.0808,-9798.65,229.8021,5825.6669,1451.2603,1254.3415,1248.0879,1531.4458,2538.7907,10.0195,6274.6511,675.7912,238.6546,1327.0697,0.0,0.0,1778.225,2638101.0,7712469.0,5006368.0,5289283.0,17091.308278,-0.740417
1699,Czechia,1997,0.0109,0.3262,441.2307,926.342278,280.6481,67.0,823.9536,-9798.65,229.8021,5530.3186,1477.3918,976.3375,1208.6799,1476.2474,2574.1596,11.998,6194.12,653.4161,222.5543,1311.5976,0.0,0.0,1359.1589,2648516.0,7690077.0,4999755.0,5279290.0,15966.643578,0.566667


Based on the above, I', going to go with an approach to look at countries that are:
1. Countries, and not provinces or parts of a country.
2. Modern day countires.

Once we deal with these missing values, we can follow an approach to remove all the countries that still have missing values. 

#### Afghanistan

In [37]:

#fillna with 0
# Fill NaN values in 'On-farm_energy_use' with 0 for rows where Country is 'Afghanistan'
df_copy.loc[df_copy['Country'] == 'Afghanistan', 'On-farm_energy_use'] = df_copy.loc[df_copy['Country'] == 'Afghanistan', 'On-farm_energy_use'].fillna(0)

df_copy[df_copy['Country'] == 'Afghanistan']

Unnamed: 0,Country,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,Net_Forest_conversion,Food_Household_Consumption,Food_Retail,On-farm_Electricity_Use,Food_Packaging,Agrifood_Systems_Waste_Disposal,Food_Processing,Fertilizers_Manufacturing,IPPU,Manure_applied_to_Soils,Manure_left_on_Pasture,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
0,Afghanistan,1990,14.7237,0.0557,205.6077,686.0,0.0,11.807483,63.1152,-2388.803,0.0,79.0851,109.6446,14.2666,67.631366,691.7888,252.21419,11.997,209.9778,260.1431,1590.5319,319.1763,0.0,0.0,0.0,9655167.0,2593947.0,5348387.0,5346409.0,2198.963539,0.536167
1,Afghanistan,1991,14.7237,0.0557,209.4971,678.16,0.0,11.712073,61.2125,-2388.803,0.0,80.4885,116.6789,11.4182,67.631366,710.8212,252.21419,12.8539,217.0388,268.6292,1657.2364,342.3079,0.0,0.0,0.0,10230490.0,2763167.0,5372959.0,5372208.0,2323.876629,0.020667
2,Afghanistan,1992,14.7237,0.0557,196.5341,686.0,0.0,11.712073,53.317,-2388.803,0.0,80.7692,126.1721,9.2752,67.631366,743.6751,252.21419,13.4929,222.1156,264.7898,1653.5068,349.1224,0.0,0.0,0.0,10995568.0,2985663.0,6028494.0,6028939.0,2356.304229,-0.259583
3,Afghanistan,1993,14.7237,0.0557,230.8175,686.0,0.0,11.712073,54.3617,-2388.803,0.0,85.0678,81.4607,9.0635,67.631366,791.9246,252.21419,14.0559,201.2057,261.7221,1642.9623,352.2947,0.0,0.0,0.0,11858090.0,3237009.0,7003641.0,7000119.0,2368.470529,0.101917
4,Afghanistan,1994,14.7237,0.0557,242.0494,705.6,0.0,11.712073,53.9874,-2388.803,0.0,88.8058,90.4008,8.3962,67.631366,831.9181,252.21419,15.1269,182.2905,267.6219,1689.3593,367.6784,0.0,0.0,0.0,12690115.0,3482604.0,7733458.0,7722096.0,2500.768729,0.37225
5,Afghanistan,1995,14.7237,0.0557,243.8152,666.4,0.0,11.712073,54.6445,-2388.803,0.0,90.1626,98.868,7.7721,67.631366,863.04,252.21419,15.9118,174.3647,275.2359,1779.3139,397.5498,0.0,0.0,0.0,13401971.0,3697570.0,8219467.0,8199445.0,2624.612529,0.285583
6,Afghanistan,1996,38.9302,0.2014,249.0364,686.0,0.0,11.712073,53.1637,-2388.803,0.0,93.7905,21.6458,7.2221,67.631366,887.8772,252.21419,16.9535,165.423,310.1306,1900.5873,465.205,0.0,0.0,0.0,13952791.0,3870093.0,8569175.0,8537421.0,2838.921329,0.036583
7,Afghanistan,1997,30.9378,0.1193,276.294,705.6,0.0,11.712073,52.039,-2388.803,0.0,93.9696,28.2132,7.4401,67.631366,905.8111,269.476476,18.1079,164.4681,338.9329,2110.6375,511.5927,0.0,0.0,0.0,14373573.0,4008032.0,8916862.0,8871958.0,3204.180115,0.415167
8,Afghanistan,1998,64.1411,0.3263,287.4346,705.6,0.0,11.712073,52.705,-2388.803,0.0,95.2597,30.887,6.5105,67.631366,916.4552,318.548422,19.1808,163.5052,362.5683,2305.3943,541.6598,0.0,0.0,0.0,14733655.0,4130344.0,9275541.0,9217591.0,3560.716661,0.890833
9,Afghanistan,1999,46.1683,0.0895,247.498,548.8,0.0,11.712073,35.763,-2388.803,0.0,98.9876,39.4317,5.5415,67.631366,924.9281,306.779994,20.4208,163.5503,400.5558,2554.6904,611.0611,0.0,0.0,0.0,15137497.0,4266179.0,9667811.0,9595036.0,3694.806533,1.0585


In [42]:
pd.set_option('display.max_columns', 500)
df_copy[df_copy['On-farm_energy_use'].isna()]

Unnamed: 0,Country,Year,Savanna_fires,Forest_fires,Crop_Residues,Rice_Cultivation,Drained_organic_soils_(CO2),Pesticides_Manufacturing,Food_Transport,Forestland,Net_Forest_conversion,Food_Household_Consumption,Food_Retail,On-farm_Electricity_Use,Food_Packaging,Agrifood_Systems_Waste_Disposal,Food_Processing,Fertilizers_Manufacturing,IPPU,Manure_applied_to_Soils,Manure_left_on_Pasture,Manure_Management,Fires_in_organic_soils,Fires_in_humid_tropical_forests,On-farm_energy_use,Rural_population,Urban_population,Total_Population_-_Male,Total_Population_-_Female,total_emission,Average_Temperature_°C
461,Bahrain,1990,0.000,0.0,0.0006,246.407276,0.0,0.000000,116.037800,-1.8626,0.0,181.3300,154.908500,21.140100,625.580400,320.760600,247.314516,70.493900,4237.9918,0.6294,12.219800,1.5460,0.0,0.0,,58816.0,437115.0,302861.0,214558.0,6234.498093,0.652750
462,Bahrain,1991,0.000,0.0,0.0006,246.407276,0.0,1.000000,117.131600,-1.8626,0.0,176.0500,138.968400,12.861000,601.402300,331.068600,209.587728,86.647300,1731.5459,0.6363,11.906300,1.5451,0.0,0.0,,59494.0,450271.0,313375.0,222040.0,3664.895805,-0.267417
463,Bahrain,1992,0.000,0.0,0.0010,246.407276,0.0,0.000000,129.901400,-1.8626,0.0,206.0455,144.244700,17.381800,620.861800,340.325300,209.587728,101.102200,1944.8331,0.6505,12.076200,1.5533,0.0,0.0,,60688.0,462399.0,324822.0,229648.0,3973.109205,-0.679583
464,Bahrain,1993,0.000,0.0,0.0013,246.407276,0.0,0.000000,125.262100,-1.8626,0.0,205.0604,118.953700,18.151200,758.518100,348.451000,209.587728,114.597800,2337.2729,0.6608,11.885100,1.5549,0.0,0.0,,62228.0,473985.0,336425.0,237331.0,4494.501705,0.239417
465,Bahrain,1994,0.000,0.0,0.0015,246.407276,0.0,0.000000,132.027400,-1.8626,0.0,230.1559,149.063200,21.710900,757.732100,355.855200,209.587728,132.160600,2112.1978,0.6677,11.592400,1.5412,0.0,0.0,,63798.0,485790.0,348166.0,245088.0,4358.838305,0.822167
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6866,Western Sahara,2017,0.000,0.0,,926.342278,0.0,252.742028,2298.021251,-104.5657,0.0,,1266.337019,643.580231,1186.432655,125.806542,2369.112862,2012.447178,,,2148.251693,,0.0,0.0,,73931.0,478697.0,286418.0,232440.0,13124.508039,2.089667
6867,Western Sahara,2018,0.000,0.0,,926.342278,0.0,252.742028,2298.021251,-104.5657,0.0,,1259.026036,643.580231,1186.432655,128.229188,2369.112862,2012.447178,,,2148.251693,,0.0,0.0,,75548.0,491873.0,293827.0,238176.0,13119.619702,1.033500
6868,Western Sahara,2019,0.000,0.0,,926.342278,0.0,252.742028,2298.021251,-104.5657,0.0,,1266.337019,643.580231,1186.432655,138.576362,2369.112862,2012.447178,,,2148.251693,,0.0,0.0,,77150.0,505328.0,301060.0,243815.0,13137.277858,1.432333
6869,Western Sahara,2020,0.000,0.0,,926.342278,0.0,252.742028,2298.021251,-104.5657,0.0,,1266.337019,643.580231,1186.432655,154.417436,2369.112862,2012.447178,,,2148.251693,,0.0,0.0,,78676.0,518663.0,307187.0,248862.0,13153.118933,2.068000
