# NumPy Practice Project
Using NumPy, explore and analyze the data present in the publicly available file
from the U.S. Energy Information Administration which contains information on electricity
generation in the USA from a range of sources. The data file is a CSV file, so use the csv
module to import the data into a list which should be converted to an appropriate numpy
array(as shown below)
Link to the dataset:
https://drive.google.com/file/d/1Cz0Yvx4yES8ydhLaBSnclIPxW2yKHwWH/view?usp=sharing

In [19]:
import numpy as np
import csv
data = []
with open(r"C:\Users\HP\Desktop\Data-LB\New folder\MER_T07_02A-2020-02-03.csv") as csvfile:
    file_reader = csv.reader(csvfile,delimiter = ',')
    for row in file_reader:
        data.append(row)
data = np.array(data)       

In [20]:
data

array([['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'],
       ['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['ELETPUS', '201908', '401363.186', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201909', '359300.603', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201910', '321921.812', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

# 1. Explore the important attributes like dimension,shape, data type etc, of the array formed above.

In [21]:
##dimmension
np.ndim(data)


2

In [22]:
#shape
data.shape

(8217, 6)

In [23]:
data.dtype

dtype('<U80')

# 2. Print the data contained in the first 10 rows of the 4th column.

In [24]:
data[0:10,3:4]

array([['Column_Order'],
       ['1'],
       ['1'],
       ['1'],
       ['1'],
       ['1'],
       ['1'],
       ['1'],
       ['1'],
       ['1']], dtype='<U80')

# 3. Which row serves as the headers/titles for all the columns.

First row srves as header

In [25]:
data[0:1]

array([['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit']],
      dtype='<U80')

# 4. Print the data contained in column 2 and 3 from row 1 till row 20

In [26]:
data[0:20,1:3]

array([['YYYYMM', 'Value'],
       ['194913', '135451.32'],
       ['195013', '154519.994'],
       ['195113', '185203.657'],
       ['195213', '195436.666'],
       ['195313', '218846.325'],
       ['195413', '239145.966'],
       ['195513', '301362.698'],
       ['195613', '338503.484'],
       ['195713', '346386.207'],
       ['195813', '344365.781'],
       ['195913', '378424.21'],
       ['196013', '403067.357'],
       ['196113', '421870.669'],
       ['196213', '450249.238'],
       ['196313', '493926.719'],
       ['196413', '526230.019'],
       ['196513', '570925.951'],
       ['196613', '613474.8'],
       ['196713', '630483.363']], dtype='<U80')

# 5. Print the data present in only the first three and the last three rows of all the columns in a single output.


In [27]:
data_1 = data[0:3]
data_2 = data[-3:]
np.vstack((data_1,data_2))

array([['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'],
       ['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201908', '401363.186', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201909', '359300.603', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201910', '321921.812', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

# 6. Sort the data on the basis of net amount of electricity generated irrespective of the source.


In [28]:
val = data[1:,2:3]
val.shape

(8216, 1)

In [29]:
cond = val[:]!= 'Not Available'
val = val[cond].astype(float)

In [30]:
val.shape

(7386,)

In [31]:
np.sort(val)

array([  -8823.445,   -8742.928,   -8535.065, ..., 4125059.9  ,
       4156744.724, 4174397.655])

In [32]:
#net amount of electricity
np.sum(val)

642969289.517

# 7. Find the total amount of electricity generated using coal and nuclear between1949-1990. ( In this dataset, rows containing monthly data express date in the format 'YYYYMM'. Rows containing annual data express the date in the format 'YYYY13'.)


In [33]:
data[0:2]

array([['MSN', 'YYYYMM', 'Value', 'Column_Order', 'Description', 'Unit'],
       ['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours']], dtype='<U80')

In [45]:
c = data[1:]

In [46]:
c1 = data[1:,1].astype(int)

In [47]:
c1

array([194913, 195013, 195113, ..., 201908, 201909, 201910])

In [48]:
cond_1 = np.mod(c1,100)==13

In [49]:
energy = c[cond_1]

In [50]:
energy

array([['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195113', '185203.657', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['ELETPUS', '201613', '4076674.984', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201713', '4034270.522', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201813', '4174397.655', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

In [52]:
np.unique(data[:,4])

array(['Description', 'Electricity Net Generation From Coal, All Sectors',
       'Electricity Net Generation From Conventional Hydroelectric Power, All Sectors',
       'Electricity Net Generation From Geothermal, All Sectors',
       'Electricity Net Generation From Hydroelectric Pumped Storage, All Sectors',
       'Electricity Net Generation From Natural Gas, All Sectors',
       'Electricity Net Generation From Nuclear Electric Power, All Sectors',
       'Electricity Net Generation From Other Gases, All Sectors',
       'Electricity Net Generation From Petroleum, All Sectors',
       'Electricity Net Generation From Solar, All Sectors',
       'Electricity Net Generation From Waste, All Sectors',
       'Electricity Net Generation From Wind, All Sectors',
       'Electricity Net Generation From Wood, All Sectors',
       'Electricity Net Generation Total (including from sources not shown), All Sectors'],
      dtype='<U80')

In [55]:
cond_1 = energy[:,1].astype(int) <=199013

In [58]:
e1 = energy[cond_1]

In [59]:
cond_2 = e1[:,4] =='Electricity Net Generation From Coal, All Sectors'

In [60]:
e2 = e1[cond_2]

In [61]:
cond_3 = e1[:,4]=='Electricity Net Generation From Nuclear Electric Power, All Sectors'

In [62]:
e3 = e1[cond_3]

In [63]:
final_energy = np.vstack((e2,e3))

In [64]:
final_energy

array([['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195113', '185203.657', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195213', '195436.666', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195313', '218846.325', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195413', '239145.966', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195513', '301362.698', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],


In [66]:
elct = final_energy[:,2]

In [69]:
np.sort(elct)

array(['0', '0', '0', '0', '0', '0', '0', '0', '1075037.091', '113975.74',
       '1161562.368', '1192004.204', '1203203.232', '12528.419',
       '1259424.279', '1341680.752', '135451.32', '1385831.452',
       '13927.839', '1402128.125', '1463781.289', '1540652.774',
       '154519.994', '1583779.139', '1594011.479', '164.691', '1692.149',
       '172505.075', '185203.657', '188.101', '191103.531', '195436.666',
       '21804.448', '218846.325', '2269.685', '239145.966', '250883.283',
       '251115.575', '255154.623', '272673.503', '276403.07',
       '282773.248', '293677.119', '301362.698', '3211.836', '327633.549',
       '3342.743', '338503.484', '344365.781', '346386.207', '3656.699',
       '378424.21', '38104.545', '383690.727', '403067.357', '414038.063',
       '421870.669', '450249.238', '455270.382', '493926.719', '518.182',
       '526230.019', '526973.047', '529354.717', '54091.135', '5519.909',
       '570925.951', '576861.678', '613474.8', '630483.363', '684904.58',
 

In [71]:
#net electricity generated by coal and neuclear power in 1949 to 1990
np.sum(elct.astype(float))

37990978.569000006

# 8. Print all the unique sources of Energy generation present in the dataset.


In [72]:
np.unique(data[:,4])

array(['Description', 'Electricity Net Generation From Coal, All Sectors',
       'Electricity Net Generation From Conventional Hydroelectric Power, All Sectors',
       'Electricity Net Generation From Geothermal, All Sectors',
       'Electricity Net Generation From Hydroelectric Pumped Storage, All Sectors',
       'Electricity Net Generation From Natural Gas, All Sectors',
       'Electricity Net Generation From Nuclear Electric Power, All Sectors',
       'Electricity Net Generation From Other Gases, All Sectors',
       'Electricity Net Generation From Petroleum, All Sectors',
       'Electricity Net Generation From Solar, All Sectors',
       'Electricity Net Generation From Waste, All Sectors',
       'Electricity Net Generation From Wind, All Sectors',
       'Electricity Net Generation From Wood, All Sectors',
       'Electricity Net Generation Total (including from sources not shown), All Sectors'],
      dtype='<U80')

# 9. Print all the details(annual) where the energy source is Wind Energy. Use the concept of masking to filter the data.

In [74]:
energy

array([['CLETPUS', '194913', '135451.32', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195013', '154519.994', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ['CLETPUS', '195113', '185203.657', '1',
        'Electricity Net Generation From Coal, All Sectors',
        'Million Kilowatthours'],
       ...,
       ['ELETPUS', '201613', '4076674.984', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201713', '4034270.522', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours'],
       ['ELETPUS', '201813', '4174397.655', '13',
        'Electricity Net Generation Total (including from sources not shown), All Sectors',
        'Million Kilowatthours']], dtype='<U80')

In [75]:
cond_4 = energy[:,4] == 'Electricity Net Generation From Wind, All Sectors'

In [76]:
wind_energy = energy[cond_4]

In [78]:
cond_5 = wind_energy[:,2] != 'Not Available'

In [79]:
wind_energy = wind_energy[cond_5]

In [80]:
np.sum(wind_energy[:,2].astype(float))

1940339.685

# 10. Print the Total Energy generated in the USA till date.

In [81]:
np.sum(val)

642969289.517

# 11. Print the average annual energy generated from wind in the USA and also the standard deviation present in the energy generation.


In [82]:
wind_energy

array([['WYETPUS', '198313', '2.668', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '198413', '6.49', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '198513', '5.762', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '198613', '4.189', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '198713', '3.541', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '198813', '0.871', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '198913', '2112.043', '12',
        'Electricity Net Generation From Wind, All Sectors',
        'Million Kilowatthours'],
       ['WYETPUS', '19901

In [83]:
w = wind_energy[:,2].astype(float)

In [84]:
#average
np.mean(w)

53898.324583333335

In [85]:
#standard deviation
np.std(w)

81322.74833843806

# 12. What and when was the maximum annual energy generated?


In [86]:
e1 = data[1:]

In [87]:
cond_6 = e1[:,2] !='Not Available'

In [88]:
e2 = e1[cond_6]

In [89]:
max_energy = np.max(e2[:,2].astype(float))

In [90]:
max_energy

4174397.655

In [91]:
cond_7 = np.argmax(e2[:,2].astype(float))

In [93]:
e2[cond_7][1]

'201813'