# 4. Calculate Profitability
-------------------
Group 3 , September 24, 2022
1. Gezhi Cheng, 
2. Haowei Lee, 
3. Ziyi Liu, 
4. VS Chaitanya Madduri

> <i>Description: The program in this notebook
- conducts data cleaning on wrds data
- merges company informations with processed wrds data
- calculates profitability and percentage change of profitability on consecutive three years
</i>


<div class="alert alert-block alert-info">
    <b>Tip:</b> # Please run this notebook in the colab . 
</div> 

### Pre requisites: 
1. And add the shortcut of the drive link :https://drive.google.com/drive/folders/1X4UdGsQiHVWSr63FRiz8rwOuWW5Ua8uI?usp=sharing to your personal drive.


Due to the huge files we have used our personal google drive folders to save the files.

Files:
- wrds_data.csv - Financial data downloaded from [WRDS](https://wrds-www.wharton.upenn.edu/login/?next=/pages/get-data/center-research-security-prices-crsp/annual-update/crspcompustat-merged/fundamentals-annual/)
- Company_Details_analysis.csv - Data storing company general informations

### Output files:

Files:
profitability_data.csv - Data storing profitability and percentage change in profitablilty


## Load dependencies

In [1]:
# Mount the program to the folder on Google Drive to get access to other files on cloud
from google.colab import drive
drive.mount('/content/drive')

# import python packages
import numpy as np
import pandas as pd
from typing import List
import warnings

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1. Preprocess financial data from WRDS



### 1.1 Define constants

In [2]:
# Define financial terms as CONSTANTS
GROSS_PROFIT = "Gross Profit (Loss)"
REVENUE = "Revenue - Total"
GROSS_PROFIT_MARGIN = "Gross Profit Margin"
NET_INCOME = "Net Income (Loss)"
NET_PROFIT_MARGIN = "Net Profit Margin"
OPERATING_EXPENSES = "Operating Expenses Total"
OPERATING_PROFIT_MARGIN = "Operating Profit Margin"
ASSETS = "Assets - Total"
EQUITY = "Stockholders Equity - Total"
RETAINED_EARNINGS = "Retained Earnings"
ROA = "ROA"
ROE = "ROE"

# Define tha map for renaming a dataframe
RENAMING_MAP = {
                "CONM": "Company Name",
                "CIK": "CIK Number",
                "ACT": "Current Assets - Total",
                "AT": "Assets - Total",
                "CH": "Cash",
                "INVOFS": "Inventory/Stock - Other",
                "LCT": "Current Liabilities - Total",
                "LT": "Liabilities - Total",
                "SEQ": "Stockholders' Equity - Total",
                "COGS": "Cost of Goods Sold",
                "EPSPI": "Earnings Per Share (Basic) Including Extraordinary Items",
                "EPSPX": "Earnings Per Share (Basic) Excluding Extraordinary Items",
                "GP": "Gross Profit (Loss)",
                "NI": "Net Income (Loss)",
                "OPEPS": "Earnings Per Share from Operations",
                "OPITI": "Operating Income - Total",
                "REVT": "Revenue - Total",
                "SALE": "Sales/Turnover (Net)",
                "OANCF": "Operating Activities Net Cash Flow",
                "UNWCC": "Working Capital (Use) - Increase (Decrease) (Cash Flow)",
                "ACQINVT": "Acquired Assets - Inventory",
                "DT": "Total Debt Including Current",
                "FINCH": "Finance Division - Cash",
                "TEQ": "Stockholders Equity - Total",
                "XOPR": "Operating Expenses Total",
                "RE": "Retained Earnings"
                }

# make sure the key in the renamming map is in lower case
RENAMING_MAP_LOWER = {k.lower(): v for k, v in RENAMING_MAP.items()}


# define the path of the root folder and of the data
DIR_PATH = "/content/drive/MyDrive/SPM_files/"  # take the current folder as the root folder
WRDS_DATA_PATH = "wrds_data.csv"

LIMIT = 3  # define the numbers of rows for data exploration

### 1.2 Inspect WRDS data

In [3]:
# read in the WRDS data
df_wrds = pd.read_csv(DIR_PATH + WRDS_DATA_PATH)

# rename column names with the map defined in Constants part
df_wrds = df_wrds.rename(columns=RENAMING_MAP_LOWER)

# inspect the dimension and the first few records of the wrds data
print(df_wrds.shape)
df_wrds.head(LIMIT)

(733, 37)


Unnamed: 0,GVKEY,datadate,fyear,indfmt,consol,popsrc,datafmt,Company Name,curcd,Acquired Assets - Inventory,...,Operating Income - Total,Retained Earnings,Revenue - Total,Sales/Turnover (Net),Stockholders' Equity - Total,Stockholders Equity - Total,Working Capital (Use) - Increase (Decrease) (Cash Flow),Operating Expenses Total,CIK Number,costat
0,1075,20111231,2011,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,,1382.32,3241.379,3241.379,3821.85,3930.586,,2067.817,764622,A
1,1075,20121231,2012,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,,1510.094,3301.804,3301.804,3972.806,4102.289,,2045.713,764622,A
2,1075,20131231,2013,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,,1707.22,3454.628,3454.628,4194.47,4340.46,,2192.597,764622,A


### 1.3 Calculate finantial indices

In [4]:
# Define functions to calculate financial indices
def get_gross_profit_margin(df):
    df[GROSS_PROFIT_MARGIN] = df[GROSS_PROFIT] / df[REVENUE]
    return df

def get_net_profit_margin(df):
    df[NET_PROFIT_MARGIN] = df[NET_INCOME] / df[REVENUE]
    return df

def get_operating_profit_margin(df):
    df[OPERATING_PROFIT_MARGIN] = (df[GROSS_PROFIT] - df[OPERATING_EXPENSES]) / df[REVENUE]
    return df

def get_roa(df):
    df[ROA] = df[NET_INCOME] / df[ASSETS]
    return df

def get_roe(df):
    df[ROE] = df[NET_INCOME] / (df[EQUITY] + df[RETAINED_EARNINGS])
    return df

In [5]:
# apply the functions define above to get finantial indices
df_wrds.pipe(get_gross_profit_margin)\
       .pipe(get_net_profit_margin)\
       .pipe(get_operating_profit_margin)\
       .pipe(get_roa)\
       .pipe(get_roe)

# inspect the data
df_wrds.head(LIMIT)

Unnamed: 0,GVKEY,datadate,fyear,indfmt,consol,popsrc,datafmt,Company Name,curcd,Acquired Assets - Inventory,...,Stockholders Equity - Total,Working Capital (Use) - Increase (Decrease) (Cash Flow),Operating Expenses Total,CIK Number,costat,Gross Profit Margin,Net Profit Margin,Operating Profit Margin,ROA,ROE
0,1075,20111231,2011,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,3930.586,,2067.817,764622,A,0.362056,0.104731,-0.275887,0.025892,0.063896
1,1075,20121231,2012,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,4102.289,,2045.713,764622,A,0.380426,0.115556,-0.239149,0.028517,0.067982
2,1075,20131231,2013,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,4340.46,,2192.597,764622,A,0.365316,0.117545,-0.269368,0.03006,0.067145


## 2. Preprocess the data of companyies' details 

In [6]:
# Define CONSTANTS
COMPANY_DATA_PATH = "Company_Details_analysis.csv"
TARGET_COLUMNS = ["Index_Key", "Class_number","Std_Indust_Class", 'Company_Name']

# read in data of companies details
df_company = pd.read_csv(DIR_PATH + COMPANY_DATA_PATH)

# keep only useful columns for further processing
df_company = df_company[TARGET_COLUMNS]
df_company.head(LIMIT)

Unnamed: 0,Index_Key,Class_number,Std_Indust_Class,Company_Name
0,755001,4931,ELECTRIC & OTHER SERVICES COMBINED [4931],UNITIL CORP
1,1069533,4923,NATURAL GAS TRANSMISSION & DISTRIBUTION [4923],RGC RESOURCES INC
2,1126956,4924,NATURAL GAS DISTRIBUTION [4924],SPIRE INC


##3. Merge data from WRDS and data of companies' details into one single dataframe

### 3.1 Merge and inspect the merged data

In [7]:
# to avoid irrelevant warning messages
warnings.filterwarnings("ignore")  

# define the column at where to merge two dataframes
MERGE_COLUMN = "CIK Number"

# rename the column of company index for the convenience when merging two data
df_company.rename(columns={"Index_Key": MERGE_COLUMN}, inplace=True)
df_merged = df_wrds.merge(df_company, on =MERGE_COLUMN)

# inspect the data and its dimensions
print(df_merged.shape)
df_merged.head(LIMIT)

(733, 45)


Unnamed: 0,GVKEY,datadate,fyear,indfmt,consol,popsrc,datafmt,Company Name,curcd,Acquired Assets - Inventory,...,CIK Number,costat,Gross Profit Margin,Net Profit Margin,Operating Profit Margin,ROA,ROE,Class_number,Std_Indust_Class,Company_Name
0,1075,20111231,2011,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,764622,A,0.362056,0.104731,-0.275887,0.025892,0.063896,4911,ELECTRIC SERVICES [4911],PINNACLE WEST CAPITAL CORP
1,1075,20121231,2012,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,764622,A,0.380426,0.115556,-0.239149,0.028517,0.067982,4911,ELECTRIC SERVICES [4911],PINNACLE WEST CAPITAL CORP
2,1075,20131231,2013,INDL,C,D,STD,PINNACLE WEST CAPITAL CORP,USD,,...,764622,A,0.365316,0.117545,-0.269368,0.03006,0.067145,4911,ELECTRIC SERVICES [4911],PINNACLE WEST CAPITAL CORP


## 3.2 Define functions and constants

In [8]:
# CONSTANTS
PROFITABILITY = "profitability"
PROFITABILITY_NEXT_1 = "profitability_next_1"
PROFITABILITY_NEXT_2 = "profitability_next_2"
PROFITABILITY_NEXT_3 = "profitability_next_3"
PERCENTAGE_CHANGE_PROFITABILITY_NEXT_1 = "change_next_1"
PERCENTAGE_CHANGE_PROFITABILITY_NEXT_2 = "change_next_2"
PERCENTAGE_CHANGE_PROFITABILITY_NEXT_3 = "change_next_3"

COMPANY = "Company_Key"
COMP_NEXT_1 = "company_next_1"
COMP_NEXT_2 = "company_next_2"
COMP_NEXT_3 = "company_next_3"


RENAMING_MAP_MERGED = {
    "CIK Number": "Company_Key",
    "Company Name": "company",
    "Std_Indust_Class": "industry",
    "fyear": "year",
    "Profitability": "profitability"
    }

financial_columns = [GROSS_PROFIT, REVENUE, GROSS_PROFIT_MARGIN, NET_INCOME, NET_PROFIT_MARGIN, OPERATING_EXPENSES, ASSETS, EQUITY, RETAINED_EARNINGS, ROA, ROE]

KEY_COLUMNS = ["Company_Key", "year", "profitability", "Company_Name"] + financial_columns

In [9]:
# define functions to calculate profitability and percentage changes
def get_profitability(df, ratio_columns:List = [OPERATING_PROFIT_MARGIN, NET_PROFIT_MARGIN, ROA, ROE, GROSS_PROFIT_MARGIN]):
    """
    This function calculates the proftability by taking average on given columns of financial ratios
    input: 
        df: pd.DataFrame
        ratio_columns: the specified columns of financial ratio
    return:
        original dataframe with a new profitability column
    """
    df_ratios = df.copy()[ratio_columns]
    df_ratios[PROFITABILITY] = df_ratios.sum(axis=1) / len(ratio_columns)
    df[PROFITABILITY] = df_ratios[PROFITABILITY]

    return df

def get_key_columns(df, renaming_map: dict, key_col: List):
    """
    This function renames columns of given dataframe and keep only relevant columns
    input: 
        df: pd.DataFrame
        renaming_map: the mapper for renaming
        key_col: the specified columns to keep
    return:
        dataframe with renamed and relevant columns
    """
    df = df.rename(columns=renaming_map)
    df_key_col = df.copy()[key_col]

    return df_key_col

def reorder_df(df, columns = ['Company_Key', 'year']):
    """
    This function reorders the records by given columns. By default, the function
    reorders by columns 'Company_Key' and 'year'
    input: 
        df: pd.DataFrame
        columns: the specified columns to reorder
    return:
        dataframe with reordered recoreds
    """
    
    return df.sort_values(columns)

def get_percentage_change(df, n_next_1:int = -1, n_next_2:int = -2, n_next_3:int = -3):
    """
    n: int - shift by number n of rows
    """

    df[PROFITABILITY_NEXT_1] = df[PROFITABILITY].shift(n_next_1)  # shift by n years
    df[PROFITABILITY_NEXT_2] = df[PROFITABILITY].shift(n_next_2)  # shift by n years
    df[PROFITABILITY_NEXT_3] = df[PROFITABILITY].shift(n_next_3)  # shift by n years

    df[COMP_NEXT_1] = df[COMPANY].shift(n_next_1)  # shift by n years
    df[COMP_NEXT_2] = df[COMPANY].shift(n_next_2)  # shift by n years
    df[COMP_NEXT_3] = df[COMPANY].shift(n_next_3)  # shift by n years

    # df = df.dropna()  # drop NaN after shifting

    df[PERCENTAGE_CHANGE_PROFITABILITY_NEXT_1] = (((df[PROFITABILITY_NEXT_1] - df[PROFITABILITY]) / df[PROFITABILITY] ) * 100).round(2)
    df[PERCENTAGE_CHANGE_PROFITABILITY_NEXT_2] = (((df[PROFITABILITY_NEXT_2] - df[PROFITABILITY]) / df[PROFITABILITY] ) * 100).round(2)
    df[PERCENTAGE_CHANGE_PROFITABILITY_NEXT_3] = (((df[PROFITABILITY_NEXT_3] - df[PROFITABILITY]) / df[PROFITABILITY] ) * 100).round(2)

    df[PROFITABILITY] = df[PROFITABILITY].round(4)
    df[PROFITABILITY_NEXT_1] = df[PROFITABILITY_NEXT_1].round(4)
    df[PROFITABILITY_NEXT_2] = df[PROFITABILITY_NEXT_2].round(4)
    df[PROFITABILITY_NEXT_3] = df[PROFITABILITY_NEXT_3].round(4)

    return df

#### 3.3 Inspect the result of profatibility

In [10]:
# apply the functions to calculate profitability
df_profitability = df_merged.pipe(get_profitability)\
                            .pipe(get_key_columns, RENAMING_MAP_MERGED, KEY_COLUMNS)\
                            .pipe(reorder_df)\
                            .pipe(get_percentage_change)

In [11]:
df_profitability

Unnamed: 0,Company_Key,year,profitability,Company_Name,Gross Profit (Loss),Revenue - Total,Gross Profit Margin,Net Income (Loss),Net Profit Margin,Operating Expenses Total,...,ROE,profitability_next_1,profitability_next_2,profitability_next_3,company_next_1,company_next_2,company_next_3,change_next_1,change_next_2,change_next_3
22,4904,2011,0.0322,AMERICAN ELECTRIC POWER CO INC,4525.000,15116.000,0.299352,1946.000,0.128738,10591.000,...,0.096888,0.0255,0.0272,0.0221,4904.0,4904.0,4904.0,-20.91,-15.54,-31.33
23,4904,2012,0.0255,AMERICAN ELECTRIC POWER CO INC,4784.000,14945.000,0.320107,1259.000,0.084242,10161.000,...,0.059567,0.0272,0.0221,0.0420,4904.0,4904.0,4904.0,6.79,-13.17,65.03
24,4904,2013,0.0272,AMERICAN ELECTRIC POWER CO INC,4854.000,15357.000,0.316077,1480.000,0.096373,10503.000,...,0.065092,0.0221,0.0420,0.0134,4904.0,4904.0,4904.0,-18.69,54.54,-50.69
25,4904,2014,0.0221,AMERICAN ELECTRIC POWER CO INC,5216.000,17020.000,0.306463,1634.000,0.096005,11804.000,...,0.067725,0.0420,0.0134,0.0561,4904.0,4904.0,4904.0,90.07,-39.36,153.76
26,4904,2015,0.0420,AMERICAN ELECTRIC POWER CO INC,5343.200,16453.200,0.324751,2047.100,0.124420,11110.000,...,0.078205,0.0134,0.0561,0.0291,4904.0,4904.0,4904.0,-68.09,33.51,-30.70
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,1783400,2017,0.0507,"US ECOLOGY, INC.",194.343,504.042,0.385569,49.365,0.097938,394.165,...,0.104919,0.0394,0.0118,-0.3364,1783400.0,1783400.0,1783400.0,-22.37,-76.80,-763.40
18,1783400,2018,0.0394,"US ECOLOGY, INC.",212.653,565.928,0.375760,49.595,0.087635,445.615,...,0.092744,0.0118,-0.3364,-0.0268,1783400.0,1783400.0,1783400.0,-70.12,-954.59,-168.03
19,1783400,2019,0.0118,"US ECOLOGY, INC.",264.136,685.509,0.385314,33.140,0.048344,550.113,...,0.027477,-0.3364,-0.0268,,1783400.0,1783400.0,,-2959.85,-327.67,
20,1783400,2020,-0.3364,"US ECOLOGY, INC.",352.954,933.854,0.377954,-389.359,-0.416938,776.990,...,-0.976275,-0.0268,,,1783400.0,,,-92.04,,


In [12]:
# specift the order of the columns
COLUMNS_ORDER = ["year", "Company_Key", 
                 "Company_Name",  "company_next_1","company_next_2", "company_next_3",
                 "profitability", "profitability_next_1", "profitability_next_2", "profitability_next_3",
                 "change_next_1", "change_next_2", "change_next_3"] + financial_columns

# reorder the dataframe        
df_profitability = df_profitability[COLUMNS_ORDER]
df_profitability

Unnamed: 0,year,Company_Key,Company_Name,company_next_1,company_next_2,company_next_3,profitability,profitability_next_1,profitability_next_2,profitability_next_3,...,Revenue - Total,Gross Profit Margin,Net Income (Loss),Net Profit Margin,Operating Expenses Total,Assets - Total,Stockholders Equity - Total,Retained Earnings,ROA,ROE
22,2011,4904,AMERICAN ELECTRIC POWER CO INC,4904.0,4904.0,4904.0,0.0322,0.0255,0.0272,0.0221,...,15116.000,0.299352,1946.000,0.128738,10591.000,52223.000,14665.000,5420.000,0.037263,0.096888
23,2012,4904,AMERICAN ELECTRIC POWER CO INC,4904.0,4904.0,4904.0,0.0255,0.0272,0.0221,0.0420,...,14945.000,0.320107,1259.000,0.084242,10161.000,54367.000,15237.000,5899.000,0.023157,0.059567
24,2013,4904,AMERICAN ELECTRIC POWER CO INC,4904.0,4904.0,4904.0,0.0272,0.0221,0.0420,0.0134,...,15357.000,0.316077,1480.000,0.096373,10503.000,56414.000,16086.000,6651.000,0.026235,0.065092
25,2014,4904,AMERICAN ELECTRIC POWER CO INC,4904.0,4904.0,4904.0,0.0221,0.0420,0.0134,0.0561,...,17020.000,0.306463,1634.000,0.096005,11804.000,59633.000,16824.000,7303.000,0.027401,0.067725
26,2015,4904,AMERICAN ELECTRIC POWER CO INC,4904.0,4904.0,4904.0,0.0420,0.0134,0.0561,0.0291,...,16453.200,0.324751,2047.100,0.124420,11110.000,61683.100,17904.900,8271.200,0.033187,0.078205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,2017,1783400,"US ECOLOGY, INC.",1783400.0,1783400.0,1783400.0,0.0507,0.0394,0.0118,-0.3364,...,504.042,0.385569,49.365,0.097938,394.165,802.076,324.077,146.429,0.061547,0.104919
18,2018,1783400,"US ECOLOGY, INC.",1783400.0,1783400.0,1783400.0,0.0394,0.0118,-0.3364,-0.0268,...,565.928,0.375760,49.595,0.087635,445.615,947.898,359.217,175.533,0.052321,0.092744
19,2019,1783400,"US ECOLOGY, INC.",1783400.0,1783400.0,,0.0118,-0.3364,-0.0268,,...,685.509,0.385314,33.140,0.048344,550.113,2231.244,1011.380,194.720,0.014853,0.027477
20,2020,1783400,"US ECOLOGY, INC.",1783400.0,,,-0.3364,-0.0268,,,...,933.854,0.377954,-389.359,-0.416938,776.990,1831.283,601.931,-203.110,-0.212615,-0.976275


In [13]:
def remove_missing_records(df):
    # define the filters to clear the records that mix up companies after shifing the columns
    filter_diff_company = (df[COMPANY] != df[COMP_NEXT_1]) | \
                        (df[COMPANY] != df[COMP_NEXT_2]) | \
                        (df[COMPANY] != df[COMP_NEXT_3])

    # df_profitability[filter_diff_company]
    df[PROFITABILITY][filter_diff_company] = np.NaN
    df.dropna(inplace=True)
    df = df.drop(columns=[COMP_NEXT_1, COMP_NEXT_2, COMP_NEXT_3])

    return df

In [14]:
# apply the function and inspect the result
df_profitability = df_profitability.pipe(remove_missing_records)

In [15]:
df_profitability

Unnamed: 0,year,Company_Key,Company_Name,profitability,profitability_next_1,profitability_next_2,profitability_next_3,change_next_1,change_next_2,change_next_3,...,Revenue - Total,Gross Profit Margin,Net Income (Loss),Net Profit Margin,Operating Expenses Total,Assets - Total,Stockholders Equity - Total,Retained Earnings,ROA,ROE
22,2011,4904,AMERICAN ELECTRIC POWER CO INC,0.0322,0.0255,0.0272,0.0221,-20.91,-15.54,-31.33,...,15116.000,0.299352,1946.000,0.128738,10591.000,52223.000,14665.000,5420.000,0.037263,0.096888
23,2012,4904,AMERICAN ELECTRIC POWER CO INC,0.0255,0.0272,0.0221,0.0420,6.79,-13.17,65.03,...,14945.000,0.320107,1259.000,0.084242,10161.000,54367.000,15237.000,5899.000,0.023157,0.059567
24,2013,4904,AMERICAN ELECTRIC POWER CO INC,0.0272,0.0221,0.0420,0.0134,-18.69,54.54,-50.69,...,15357.000,0.316077,1480.000,0.096373,10503.000,56414.000,16086.000,6651.000,0.026235,0.065092
25,2014,4904,AMERICAN ELECTRIC POWER CO INC,0.0221,0.0420,0.0134,0.0561,90.07,-39.36,153.76,...,17020.000,0.306463,1634.000,0.096005,11804.000,59633.000,16824.000,7303.000,0.027401,0.067725
26,2015,4904,AMERICAN ELECTRIC POWER CO INC,0.0420,0.0134,0.0561,0.0291,-68.09,33.51,-30.70,...,16453.200,0.324751,2047.100,0.124420,11110.000,61683.100,17904.900,8271.200,0.033187,0.078205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14,2014,1783400,"US ECOLOGY, INC.",0.0610,0.0285,0.0438,0.0507,-53.30,-28.14,-16.83,...,447.411,0.404688,38.236,0.085461,333.339,919.855,251.337,85.615,0.041567,0.113476
15,2015,1783400,"US ECOLOGY, INC.",0.0285,0.0438,0.0507,0.0394,53.87,78.09,38.25,...,563.070,0.384023,25.611,0.045485,438.717,771.987,256.135,86.234,0.033175,0.074805
16,2016,1783400,"US ECOLOGY, INC.",0.0438,0.0507,0.0394,0.0118,15.74,-10.15,-73.15,...,477.665,0.392382,34.252,0.071707,367.804,776.400,280.024,107.154,0.044116,0.088466
17,2017,1783400,"US ECOLOGY, INC.",0.0507,0.0394,0.0118,-0.3364,-22.37,-76.80,-763.40,...,504.042,0.385569,49.365,0.097938,394.165,802.076,324.077,146.429,0.061547,0.104919


In [19]:
filter1 = df_profitability[REVENUE].between(200, 800)
filter_negetive_profit =  df_profitability[PROFITABILITY] < 0
filter_positive_profit =  df_profitability[PROFITABILITY] > 0

result_negetive_profit = df_profitability[filter1 & filter_negetive_profit]
result_negetive_profit[['Company_Name', 'Company_Key', REVENUE, PROFITABILITY]]['Company_Name'].unique()

array(['SOUTH JERSEY INDUSTRIES INC', 'UNITIL CORP',
       'CALIFORNIA WATER SERVICE GROUP', 'ORMAT TECHNOLOGIES, INC.',
       'CLEAN ENERGY FUELS CORP.', 'ATLANTIC POWER CORP',
       'GENIE ENERGY LTD.', 'CHARAH SOLUTIONS, INC.',
       'NORTHWEST NATURAL HOLDING CO'], dtype=object)

In [21]:
result_positive_profit = df_profitability[filter1 & filter_positive_profit]
result_positive_profit[['Company_Name', 'Company_Key', REVENUE, PROFITABILITY]]['Company_Name'].unique()

array(['ESSENTIAL UTILITIES, INC.', 'UNITIL CORP',
       'CALIFORNIA WATER SERVICE GROUP', 'MGE ENERGY INC',
       'ORMAT TECHNOLOGIES, INC.', 'ITC HOLDINGS CORP.',
       'ATLANTIC POWER CORP', 'PATTERN ENERGY GROUP INC.',
       'CLEARWAY ENERGY, INC.', 'TERRAFORM POWER, INC.',
       'NORTHWEST NATURAL HOLDING CO', 'US ECOLOGY, INC.'], dtype=object)

### Export data as a CSV file

In [None]:
EXPORTED_NAME = "profitability_data.csv"
df_profitability.to_csv(DIR_PATH + EXPORTED_NAME, index=False)

In [None]:
# test if the exported data work by reading in the data
# only uncomment if needed
# df_readin = pd.read_csv(DIR_PATH + EXPORTED_NAME)
# print({"rows": df_readin.shape[0], "columns": df_readin.shape[1]})
# df_readin

{'rows': 506, 'columns': 10}


Unnamed: 0,year,Company_Key,Company_Name,profitability,profitability_next_1,profitability_next_2,profitability_next_3,change_next_1,change_next_2,change_next_3
0,2011,4904,AMERICAN ELECTRIC POWER CO INC,0.0322,0.0255,0.0272,0.0221,-20.91,-15.54,-31.33
1,2012,4904,AMERICAN ELECTRIC POWER CO INC,0.0255,0.0272,0.0221,0.0420,6.79,-13.17,65.03
2,2013,4904,AMERICAN ELECTRIC POWER CO INC,0.0272,0.0221,0.0420,0.0134,-18.69,54.54,-50.69
3,2014,4904,AMERICAN ELECTRIC POWER CO INC,0.0221,0.0420,0.0134,0.0561,90.07,-39.36,153.76
4,2015,4904,AMERICAN ELECTRIC POWER CO INC,0.0420,0.0134,0.0561,0.0291,-68.09,33.51,-30.70
...,...,...,...,...,...,...,...,...,...,...
501,2014,1783400,"US ECOLOGY, INC.",0.0610,0.0285,0.0438,0.0507,-53.30,-28.14,-16.83
502,2015,1783400,"US ECOLOGY, INC.",0.0285,0.0438,0.0507,0.0394,53.87,78.09,38.25
503,2016,1783400,"US ECOLOGY, INC.",0.0438,0.0507,0.0394,0.0118,15.74,-10.15,-73.15
504,2017,1783400,"US ECOLOGY, INC.",0.0507,0.0394,0.0118,-0.3364,-22.37,-76.80,-763.40


## End of this notebook