### Purpose - Take Weights and Chars Reports from Factset and convert into a Seismic Friendly flat file format.

In [2]:
# Import dependencies
import numpy as np
import pandas as pd
import xlsxwriter
import xlwings as xw 
from xlwings import Range, constants
import os

### Step 1:  Wrangle and transform `Sector` data from FactSet

In [3]:
# Create variable with path to input file in input folder
cwd = os.getcwd()
input_path = os.path.join(cwd + "/input/input_template.xlsx")

In [4]:
# Import sector weights tab and create df
sector = pd.read_excel(input_path, sheet_name = 'SECTOR WEIGHTS', skiprows=7, header= None, names=['Sector', 'SimPortWeight', 'IndexWeight'])

# Drop blank rows
sector = sector.dropna(axis=0)

# Delete Total Row
sector = sector[sector['Sector'] != 'Total']

# Divide Weights by 100
sector[['SimPortWeight', 'IndexWeight']] = sector[['SimPortWeight', 'IndexWeight']].div(100)

# Import same file to wo skipping rows to get some add'l necessary meta_data
df_metadata = pd.read_excel(input_path, sheet_name = 'SECTOR WEIGHTS')

# Get report date
rpt_date = df_metadata.iloc[4,0]

# Get report currency
rpt_curr = df_metadata.iloc[0,0]

# Get strategy code (allow for suffix such as EVE if exists)
full_rpt_code = df_metadata.columns[0].split()[0]
rpt_code = full_rpt_code.split('_')[0]
    
# Insert add'l cols needed for Seismic
sector.insert(0, 'AsOfDate', rpt_date)
sector.insert(1, 'StrategyCode', rpt_code)

# Preview df and metadata
print('Date: ', rpt_date, " ", "Date Type: ", type(rpt_date), ' Strategy Code: ', rpt_code, \
    ' Currency: ', rpt_curr)
sector.head(13)

# Use 'filepath' below if want to make sharable with IB and MM, can use PA directory for input and output.
#filepath 'M:/Characteristics/Flat File/Template/Flat_File_Template.xlsx'

Date:  12/31/2022   Date Type:  <class 'str'>  Strategy Code:  MAWIE  Currency:  USD


Unnamed: 0,AsOfDate,StrategyCode,Sector,SimPortWeight,IndexWeight
0,12/31/2022,MAWIE,Communication Services,0.070205,0.067679
1,12/31/2022,MAWIE,Consumer Discretionary,0.098046,0.104468
2,12/31/2022,MAWIE,Consumer Staples,0.067496,0.077455
3,12/31/2022,MAWIE,Energy,0.062314,0.055754
4,12/31/2022,MAWIE,Financials,0.146197,0.151904
5,12/31/2022,MAWIE,Health Care,0.1281,0.133686
6,12/31/2022,MAWIE,Industrials,0.100026,0.10159
7,12/31/2022,MAWIE,Information Technology,0.218714,0.19993
8,12/31/2022,MAWIE,Materials,0.052759,0.049761
9,12/31/2022,MAWIE,Real Estate,0.020211,0.025876


### Step 2:  Wrangle and transform `Market Cap` data from FactSet

In [5]:
# Import mcap weights tab and create df
mcap = pd.read_excel(input_path, sheet_name = 'MARKET CAP WEIGHTS', header = None, usecols = "A:C",  skiprows = 7, names= ('Label', 'SimWeight', 'IndexWeight'))

# Insert add'l cols needed for Seismic
mcap.insert(0, 'AsOfDate', rpt_date)
mcap.insert(1, 'StrategyCode', rpt_code)
mcap.insert(2, 'Currency', rpt_curr)

# Drop blank rows
mcap = mcap.dropna(axis=0)

# Delete Total Row
mcap = mcap[mcap['Label'] != 'Total']

# Insert a 'Range' col that maps ints for each bucket
#Length of mcap = 6, np.arrange gives an array starting at 0 and up to but not including length value.
range  = np.arange(len(mcap))
range += 1
mcap.insert(3, 'Range', range)

# Divide weights by 100
mcap[['SimWeight', 'IndexWeight']] = mcap[['SimWeight', 'IndexWeight']].div(100)
mcap

# Use 'filepath' below if want to make sharable with IB and MM, can use PA directory for input and output.
#filepath 'M:/Characteristics/Flat File/Template/Flat_File_Template.xlsx'

Unnamed: 0,AsOfDate,StrategyCode,Currency,Range,Label,SimWeight,IndexWeight
0,12/31/2022,MAWIE,USD,1,Over 500B,0.119581,0.106749
1,12/31/2022,MAWIE,USD,2,100B - 500B,0.297575,0.342018
2,12/31/2022,MAWIE,USD,3,25B - 100B,0.306167,0.329125
3,12/31/2022,MAWIE,USD,4,Under 25B,0.276677,0.222077
4,12/31/2022,MAWIE,USD,5,[N/A],0.0,3.1e-05


In [6]:
# Import library and create currency symbols to be used in mkt cap output
import unicodedata

EUR_Code = '{}'.format(unicodedata.lookup("EURO SIGN"))
GBP_Code = '{}'.format(unicodedata.lookup('POUND SIGN'))
NZD_Code = 'NZD'
DKK_Code = 'DKK '
USD_Code = '$'
AUD_Code = 'A$'

# Use below dict to transform rpt_curr into correct code/symbol for later output

curr_dict =  {'USD': USD_Code, 'GBP': GBP_Code, 'DKK': DKK_Code, 'NZD': NZD_Code, 'AUD': AUD_Code, 'EUR': EUR_Code }
curr = curr_dict[rpt_curr]

# Input currency sign into mkt cap bucket categories

if curr == 'DKK ':
    Mega_Cap = '> {0}3000B'.format(curr)
    Big_Large_Cap = '{0}500B - {0}3000B'.format(curr)
    Large_Cap = '{0}150B - {0}500B'.format(curr)
    Small_Cap = '< {0}150B'.format(curr)
else:
    Mega_Cap = '> {0}500B'.format(curr)
    Big_Large_Cap = '{0}100B - {0}500B'.format(curr)
    Large_Cap = '{0}25B - {0}100B'.format(curr)
    Small_Cap = '< {0}25B'.format(curr)

# Replace each original mkt cap bucket to equivalent name w/ the curr sign

dkk_replace = {'Over 3000B': Mega_Cap, '500B - 3000B': Big_Large_Cap, '150B - 500B': Large_Cap,  'Under 150B': Small_Cap}
usd_replace = {'Over 500B': Mega_Cap, '100B - 500B': Big_Large_Cap, '25B - 100B': Large_Cap, 'Under 25B': Small_Cap}

if curr == 'DKK ':
    mcap['Label'] = mcap['Label'].replace(dkk_replace)
else:
    mcap['Label'] = mcap['Label'].replace(usd_replace)

#Preview final mkt cap df
mcap

Unnamed: 0,AsOfDate,StrategyCode,Currency,Range,Label,SimWeight,IndexWeight
0,12/31/2022,MAWIE,USD,1,> $500B,0.119581,0.106749
1,12/31/2022,MAWIE,USD,2,$100B - $500B,0.297575,0.342018
2,12/31/2022,MAWIE,USD,3,$25B - $100B,0.306167,0.329125
3,12/31/2022,MAWIE,USD,4,< $25B,0.276677,0.222077
4,12/31/2022,MAWIE,USD,5,[N/A],0.0,3.1e-05


### Step 3:  Wrangle and transform `Characteristics` data from FactSet 

In [23]:
# Import chars tab and create df
chars = pd.read_excel(input_path, sheet_name = 'CHARACTERISTICS', skiprows= 8, header = None, names=['Char', 'PortValue', 'IndValue'])

# Insert add'l columns for Seismic
chars.insert(0, 'AsOfDate', rpt_date)
chars.insert(1, 'StrategyCode', rpt_code)
chars.insert(2, 'Currency', rpt_curr)

# Rename chars derived from FDS to Seismic desired names

chars_updates = {'# of Securities': 'Number of Securities', 'P/E (1Yr Trailing)': 'Price/Earnings Ratio (LTM)', 'INTECH Dividend Yield':'Dividend Yield', 'INTECH Growth Rate (5 Year Trailing)': 'Growth Rate (5 Year Trailing)', \
'P/B': 'Price/Book Ratio', 'Market Cap - Weighted Median': 'Weighted Median Market Cap', 'Market Cap - Weighted Average': 'Weighted Average Market Cap', 'Overall Industry-Adjusted Company Score': 'Overall ESG Score', \
'Carbon Emissions - Scope 1+2 Intensity': 'Scope 1 + 2 Carbon Intensity'}

# Apply name changes from dictionary above
chars['Char'] = chars['Char'].replace(chars_updates)

chars.drop([8,9,10], inplace=True)

# Preview
chars

# Sharable with IB and MM, can use PA directory for input and output.
#filepath 'M:/Characteristics/Flat File/Template/Flat_File_Template.xlsx'

Unnamed: 0,AsOfDate,StrategyCode,Currency,Char,PortValue,IndValue
0,12/31/2022,MAWIE,USD,Number of Securities,1443.0,2884.0
1,12/31/2022,MAWIE,USD,Price/Earnings Ratio (LTM),13.199213,14.209011
2,12/31/2022,MAWIE,USD,Dividend Yield,2.559034,2.390499
3,12/31/2022,MAWIE,USD,Growth Rate (5 Year Trailing),16.548676,15.627927
4,12/31/2022,MAWIE,USD,Price/Book Ratio,2.433131,2.411241
5,12/31/2022,MAWIE,USD,Weighted Average Market Cap,266454.987522,256929.880593
6,12/31/2022,MAWIE,USD,Weighted Median Market Cap,56306.488281,70771.632812
7,12/31/2022,MAWIE,USD,Overall ESG Score,6.893209,6.786239
11,12/31/2022,MAWIE,USD,Scope 1 + 2 Carbon Intensity,142.155834,157.754791


In [24]:
# Transform numerical output to appropriate type

def MCap(item):
    item /= 1000
    item = round(item, 1)
    item = str(item)
    item = curr + item + ' B'
    return item

chars.loc[5,'PortValue'] = MCap(chars.loc[5,'PortValue'])    
chars.loc[6,'PortValue'] = MCap(chars.loc[6,'PortValue'])
chars.loc[5, 'IndValue'] = MCap(chars.loc[5, 'IndValue'])
chars.loc[6, 'IndValue'] = MCap(chars.loc[6, 'IndValue'])

def PercString(item):
    item = "{:0.2f}%".format(item)
    return item

chars.loc[2,'PortValue'] = PercString(chars.loc[2,'PortValue'])
chars.loc[2, 'IndValue'] = PercString(chars.loc[2, 'IndValue'])
chars.loc[3, 'PortValue'] = PercString(chars.loc[3, 'PortValue'])
chars.loc[3, 'IndValue'] = PercString(chars.loc[3, 'IndValue'])

def RoundString(item):
    item = "{:0.2f}".format(item)
    return item

chars.loc[1, 'PortValue'] = RoundString(chars.loc[1, 'PortValue'])
chars.loc[1, 'IndValue'] = RoundString(chars.loc[1, 'IndValue'])
chars.loc[4, 'PortValue'] = RoundString(chars.loc[4, 'PortValue'])
chars.loc[4, 'IndValue'] = RoundString(chars.loc[4, 'IndValue'])
chars.loc[7, 'PortValue'] = RoundString(chars.loc[7, 'PortValue'])
chars.loc[7, 'IndValue'] = RoundString(chars.loc[7, 'IndValue'])

def RoundInt(item):
    item = "{:0.0f}".format(item)
    return item

chars.loc[11, 'PortValue'] = RoundInt(chars.loc[11, 'PortValue'])
chars.loc[11, 'IndValue'] = RoundInt(chars.loc[11, 'IndValue'])

chars

Unnamed: 0,AsOfDate,StrategyCode,Currency,Char,PortValue,IndValue
0,12/31/2022,MAWIE,USD,Number of Securities,1443.0,2884.0
1,12/31/2022,MAWIE,USD,Price/Earnings Ratio (LTM),13.20,14.21
2,12/31/2022,MAWIE,USD,Dividend Yield,2.56%,2.39%
3,12/31/2022,MAWIE,USD,Growth Rate (5 Year Trailing),16.55%,15.63%
4,12/31/2022,MAWIE,USD,Price/Book Ratio,2.43,2.41
5,12/31/2022,MAWIE,USD,Weighted Average Market Cap,$266.5 B,$256.9 B
6,12/31/2022,MAWIE,USD,Weighted Median Market Cap,$56.3 B,$70.8 B
7,12/31/2022,MAWIE,USD,Overall ESG Score,6.89,6.79
11,12/31/2022,MAWIE,USD,Scope 1 + 2 Carbon Intensity,142,158


### Step 4:  Wrangle and transform `Top Bottom 10` data from FactSet

In [8]:
# Import Top 10 Portfolio Holdings by Weight tab and create df

tb = pd.read_excel(input_path, sheet_name = 'TOP 10 WEIGHTS', header = None, skiprows = 8, skipfooter = 1, names=['Holding', 'Weight'])

# Drop blank rows
tb = tb.dropna(axis=0)

# Delete Total Row
tb = tb[tb['Holding'] != 'Total']

# Sum up total of the top 10 holdings weights
ten_wgt_sum = tb['Weight'].sum()

# Insert add'l columns need for Seismic
tb.insert(0, 'AsOfDate', rpt_date)
tb.insert(1, 'StrategyCode', rpt_code)
tb.insert(4, 'WeightSum', ten_wgt_sum)

# Divide Weights by 100
tb[['Weight','WeightSum']] = tb[['Weight', 'WeightSum']].div(100)

tb.sort_values(by = 'Holding', inplace=True)

tb

# Use 'filepath' below if want to make sharable with IB and MM, can use PA directory for input and output.
#filepath 'M:/Characteristics/Flat File/Template/Flat_File_Template.xlsx'

Unnamed: 0,AsOfDate,StrategyCode,Holding,Weight,WeightSum
7,12/31/2022,MAWIE,"AbbVie, Inc.",0.008518,0.160169
3,12/31/2022,MAWIE,Alphabet Inc. Class A,0.014451,0.160169
4,12/31/2022,MAWIE,Alphabet Inc. Class C,0.011761,0.160169
2,12/31/2022,MAWIE,"Amazon.com, Inc.",0.016126,0.160169
0,12/31/2022,MAWIE,Apple Inc.,0.046316,0.160169
8,12/31/2022,MAWIE,Broadcom Inc.,0.00848,0.160169
1,12/31/2022,MAWIE,Microsoft Corporation,0.027493,0.160169
9,12/31/2022,MAWIE,"Taiwan Semiconductor Manufacturing Co., Ltd.",0.008077,0.160169
6,12/31/2022,MAWIE,"Tesla, Inc.",0.008623,0.160169
5,12/31/2022,MAWIE,UnitedHealth Group Incorporated,0.010324,0.160169


### Step 5:  Output to on single excel file

In [9]:
# List of dataframes
df_dict = {'sector': sector, 'mcap': mcap, 'chars': chars, 'tb':tb}

for k,v in df_dict.items():
    wb = xw.Book()
    app = xw.apps.active
    sht = wb.sheets.add('SeismicList')
    sht.range('A1').options(pd.DataFrame, index=False).value = v

    if k == 'sector':
        sht.range('a:b').api.ColumnWidth = 15.86
        sht.range('c:c').api.ColumnWidth = 25.86
        sht.range('d:e').api.ColumnWidth = 15.86
        sht.range('a1:g1').api.Font.Bold = True
        sht.range('d2:e13').number_format = '0.00%'
        sht.range('d:e').api.HorizontalAlignment = constants.HAlign.xlHAlignCenter
        file_name = 'SectorWeightsSim.xlsx'
        
    if k == 'tb':
        sht.range('a:b').api.ColumnWidth = 12.43
        sht.range('c:c').api.ColumnWidth = 44.89
        sht.range('d:e').api.ColumnWidth = 13.71
        sht.range('a1:g1').api.Font.Bold = True
        sht.range('d2:e11').number_format = '0.00%'
        file_name = 'HoldingsIndSim.xlsx'

    if k == 'chars':
        sht.range('a:c').api.ColumnWidth = 14.00
        sht.range('d:d').api.ColumnWidth = 31.71
        sht.range('e:f').api.ColumnWidth = 14.29
        sht.range('a1:g1').api.Font.Bold = True
        file_name = 'CharacteristicsSim.xlsx'

    if k == 'mcap':
        sht.range('a:c').api.ColumnWidth = 15.86
        sht.range('d:d').api.ColumnWidth = 8.43
        sht.range('e:g').api.ColumnWidth = 15.86
        sht.range('a1:g1').api.Font.Bold = True
        sht.range('d1:d6').api.HorizontalAlignment = constants.HAlign.xlHAlignCenter
        sht.range('f2:g7').number_format = '0.00%'
        file_name = 'MarketCapSim.xlsx'


    # Loop above was creating extraneous 'Sheet1' which this loop below deletes
    for sheet in wb.sheets:
        if 'Sheet' in sheet.name: 
            sheet.delete()
    

    # Put rpt_date in proper string format
    month_dict = {'1': 'Jan', '2': 'Feb', '3': 'Mar', '4': 'Apr', '5': 'May', '6': 'Jun', '7': 'Jul', '8': 'Aug', '9': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'}

    def extract_date(date_string):
        month = date_string.split('/')[0]
        month_str = month_dict[month]
        year = date_string.split('/')[2][2:]
        return f"{month_str}{year}"
        
    date_use = extract_date(rpt_date)
 
    path_one = f'C:/Users/eardito/Python Files/INTECH Projects/ETL_ffChars/output/'
    path_two = f"{rpt_code}_{date_use}/"
    fin_path = os.path.join(path_one, path_two)
    
    if not os.path.exists(fin_path):
        os.makedirs(fin_path) 

    full_path = fin_path + file_name
    
    wb.save(full_path)
    wb.close()
    del app

In [10]:


# wb = xw.Book()
# app = xw.apps.active

# # Dict where values are list of df and sheetname, will loop through and create tab and add df data to the tab
# sht_dict = {3: [df, "Sector"], 1: [mcap, "MCap"], 2: [chars, "Chars"], 0: [tb, "TB"]}

# for i in [0,1,2,3]:
#     sht = wb.sheets.add(sht_dict[i][1])
#     sht.range('A1').options(pd.DataFrame, index=False).value = sht_dict[i][0]

# # Loop above was creating extraneous 'Sheet1' which this loop below deletes
# for sheet in wb.sheets:
#     if 'Sheet' in sheet.name: 
#         sheet.delete()

# # Customize the format of each sheet
# sector_sheet = wb.sheets['Sector']
# sector_sheet.range('a:b').api.ColumnWidth = 15.86
# sector_sheet.range('c:c').api.ColumnWidth = 25.86
# sector_sheet.range('d:e').api.ColumnWidth = 15.86
# sector_sheet.range('a1:g1').api.Font.Bold = True
# sector_sheet.range('d2:e13').number_format = '0.00%'
# sector_sheet.range('d:e').api.HorizontalAlignment = constants.HAlign.xlHAlignCenter

# tb_sheet = wb.sheets['TB']
# tb_sheet.range('a:b').api.ColumnWidth = 12.43
# tb_sheet.range('c:c').api.ColumnWidth = 44.89
# tb_sheet.range('d:e').api.ColumnWidth = 13.71
# tb_sheet.range('a1:g1').api.Font.Bold = True
# tb_sheet.range('d2:e11').number_format = '0.00%'

# chars_sheet = wb.sheets['Chars']
# chars_sheet.range('a:c').api.ColumnWidth = 14.00
# chars_sheet.range('d:d').api.ColumnWidth = 31.71
# chars_sheet.range('e:f').api.ColumnWidth = 14.29
# chars_sheet.range('a1:g1').api.Font.Bold = True

# mcap_sheet = wb.sheets['MCap']
# mcap_sheet.range('a:c').api.ColumnWidth = 15.86
# mcap_sheet.range('d:d').api.ColumnWidth = 8.43
# mcap_sheet.range('e:g').api.ColumnWidth = 15.86
# mcap_sheet.range('a1:g1').api.Font.Bold = True
# mcap_sheet.range('d1:d6').api.HorizontalAlignment = constants.HAlign.xlHAlignCenter
# mcap_sheet.range('f2:g7').number_format = '0.00%'

# # Put rpt_date in proper string format
# month_dict = {'1': 'Jan', '2': 'Feb', '3': 'Mar', '4': 'Apr', '5': 'May', '6': 'Jun', '7': 'Jul', '8': 'Aug', '9': 'Sep', '10': 'Oct', '11': 'Nov', '12': 'Dec'}

# def extract_date(date_string):
#     month = date_string.split('/')[0]
#     month_str = month_dict[month]
#     year = date_string.split('/')[2][2:]
#     return f"{month_str}{year}"
    
# date_use = extract_date(rpt_date)

# dir_path = r'C:/Users/eardito/Python Files/INTECH Projects/ETL_ffChars/output/'
# file_name = f'{rpt_code}_{date_use}.xlsx'
# full_path = dir_path + file_name 
# wb.save(full_path)
# wb.close()
# del app

In [90]:
full_path

'C:/Users/eardito/Python Files/INTECH Projects/ETL_ffChars/output/I5168_Jun22.xlsx'