In [64]:
# Import CSV for parsing our data, numpy for math
import csv
import numpy as np

In [65]:
# Basic Data Model - Our objects will store relevant data
class IndustryModel:
    def __init__(self,industry,subindustry,total_revenue):
        self.industry = industry
        self.subindustry = subindustry
        self.total_revenue = total_revenue
    def __str__(self):
        return self.industry + " : " + self.subindustry + " $" + str(self.total_revenue)

In [66]:
# List to hold all of our objects
industries = []
# With statement to open/close our file automatically
with open('data.csv','r') as csv_file:
    #csv.reader will go through the file line by line
    reader = csv.reader(csv_file)
    line_count = 0
    #for each line in our file
    for row in reader:
        #Ignore the header row
        if line_count != 0:
            # create one object for each row
            rowModel = IndustryModel(row[0],row[1],int(row[2]))
            # add industry object to our list
            industries.append(rowModel)
        line_count += 1

In [67]:
for industry in industries:
    print(industry)

Manufacturing : Consumer Electronics & Computers Retail $86153757
Retail : Automobile Dealers $56803298
Retail : Grocery Retail $54269326
Retail : Department Stores, Shopping Centers & Superstores $43550263
Retail : Apparel & Accessories Retail $20639533
Retail : Flowers, Gifts & Specialty Stores $17827120
Manufacturing : Automobile Dealers $15383793
Retail : Home Improvement & Hardware Retail $8162494
Retail : Vitamins, Supplements & Health Stores $5895492
Retail : Pet Products $5630498
Manufacturing : Toys & Games $5298149
Business Services : Automobile Dealers $4464523
Retail : Sporting & Recreational Equipment Retail $3342220
Retail : Furniture $3189395
Construction : Grocery Retail $2962846
Manufacturing : Flowers, Gifts & Specialty Stores $2590605
Manufacturing : Furniture $2391198
Manufacturing : Jewelry & Watch Retail $1846751
Construction : Home Improvement & Hardware Retail $1785388
Agriculture : Grocery Retail $1672833
Consumer Services : Automobile Dealers $1554750
Retail :

In [68]:
#list comprehension method of making a list with all total revenues
industry_revenues = np.array([x.total_revenue for x in industries])
# Equivalent to
industry_revenues_long_way = []
for x in industries:
    industry_revenues_long_way.append(x.total_revenue)
    
print(industry_revenues)
print(np.mean(industry_revenues))
print(np.median(industry_revenues))

[86153757 56803298 54269326 43550263 20639533 17827120 15383793  8162494
  5895492  5630498  5298149  4464523  3342220  3189395  2962846  2590605
  2391198  1846751  1785388  1672833  1554750  1228061  1125772   735383
   728379   544569   430870]
12970639.481481481
3189395.0


In [69]:
# Calculate the va erage revenue of industries with an even amount of revenue
total = 0
count = 0
for industry in industries:
    if(industry.total_revenue % 2 == 0):
        total += industry.total_revenue
        count += 1
total = total / count
print(total)

12475482.461538462


In [70]:
# Create a Set of all industries
unique_industry_types = set([x.industry for x in industries])
print(unique_industry_types)

{'Retail', 'Agriculture', 'Government', 'Construction', 'Manufacturing', 'Consumer Services', 'Business Services'}


In [71]:
# Create an empty dictionary
industry_map = {

}
#loop through our industry types and fill in our dictionary with all subindustries
for industry_type in unique_industry_types:
    correlated_industries = [x.subindustry for x in industries if x.industry == industry_type]
    industry_map[industry_type] = correlated_industries
print(industry_map)


{'Retail': ['Automobile Dealers', 'Grocery Retail', 'Department Stores, Shopping Centers & Superstores', 'Apparel & Accessories Retail', 'Flowers, Gifts & Specialty Stores', 'Home Improvement & Hardware Retail', 'Vitamins, Supplements & Health Stores', 'Pet Products', 'Sporting & Recreational Equipment Retail', 'Furniture', 'Convenience Stores, Gas Stations & Liquor Stores', 'Jewelry & Watch Retail', ''], 'Agriculture': ['Grocery Retail'], 'Government': ['Grocery Retail'], 'Construction': ['Grocery Retail', 'Home Improvement & Hardware Retail', 'Automobile Dealers'], 'Manufacturing': ['Consumer Electronics & Computers Retail', 'Automobile Dealers', 'Toys & Games', 'Flowers, Gifts & Specialty Stores', 'Furniture', 'Jewelry & Watch Retail', 'Home Improvement & Hardware Retail'], 'Consumer Services': ['Automobile Dealers'], 'Business Services': ['Automobile Dealers']}


In [84]:
# Goal : Calculate the average revenue for each industry
industry_revenue_map = {}
for industry_type in unique_industry_types:
    industry_revenue_list = np.array([x.total_revenue for x in industries if x.industry == industry_type]) 
    industry_revenue_map[industry_type] = np.round(np.average(industry_revenue_list),2)
print(industry_revenue_map)

print(dict(sorted(industry_revenue_map.items())))


{'Retail': 17092926.23, 'Agriculture': 1672833.0, 'Government': 728379.0, 'Construction': 1726368.0, 'Manufacturing': 16342805.14, 'Consumer Services': 1554750.0, 'Business Services': 4464523.0}
{'Agriculture': 1672833.0, 'Business Services': 4464523.0, 'Construction': 1726368.0, 'Consumer Services': 1554750.0, 'Government': 728379.0, 'Manufacturing': 16342805.14, 'Retail': 17092926.23}
