## Use this script to validate individual organizations for data QC purposes

In [None]:
# importing libraries
import pandas as pd
import os
from datetime import datetime
import shutil
import glob
from pathlib import Path

In [None]:

# Create a file path to the two directories where the input file is stored
onedrive_base = Path(os.environ.get('onedrive' , 'C:/Users/azona/OneDrive - Cerity Partners'))
process_base = onedrive_base / 'Portfolio Management Team' / '2 - Monthly AUM and Performance' / 'DEV' / '2.1.1 - DEV Addepar Queries'
input_path = onedrive_base / process_base
#input_path = r'C:\\Users\\azona\\OneDrive - Cerity Partners\\Portfolio Management Team\\2 - Monthly AUM and Performance\\DEV\\2.1.1 - DEV Addepar Queries'

# Setting the directory then importing a file
os.chdir(input_path)
input_file_name = 'Holdings Cleveland.xlsx'
data = pd.read_excel(input_file_name)

In [None]:
# Establishing values that should not be rolled up
values_no_roll_up = ['Unassigned', 'Roll up - Non SMA']

# Getting a cummulative count by SMA ID
data['row_count'] = data.groupby('SMA ID').cumcount() + 1

In [None]:
# First, this code filters down to the rows that have a "SMA ID" that should not be rolled up and filtering down to the first row, which is the top of the rollup in the Excel doc
# Then, it goes to the values in the "no roll up" variable and removes the top of the roll up, the first row, and keeps the rest
data = data[((~data['SMA ID'].isin(values_no_roll_up)) & (data['row_count'] == 1 ) | (data['SMA ID'].isin(values_no_roll_up)) & (data['row_count'] != 1) ) ]

# Getting rid of the total row at the bottom
data = data[data['CP SMA ID'] != 'Total']

In [None]:
# Removing file extention from the file name
clean_file_name = input_file_name.replace('.xlsx', '')

# Getting the organization name from the file name
organization_name = clean_file_name.split('Holdings ')[-1]

# Adding organization name to the dataset
data['Organization'] = '*' + organization_name

In [None]:
# Establishing the columns to drop
columns_2_drop = ['SMA ID', 'row_count']

# Removing columns that are not needed in the final output
data = data.drop(columns = columns_2_drop)

# Sorting the columns in the sequence consistent with the way the output is to be seen
columns_sequence = ['Organization' , 'CP SMA ID', 'Adjusted Value (No Div, USD)', 'Adjusted Net Cash Flow (YTD, No Div, USD)']
data = data[columns_sequence]

In [None]:
# use this to check row counts if you would like against other results in the batch file
data

In [None]:
# Establishing output path locations
output_path = 'C:\\Users\\azona\\OneDrive - Cerity Partners\\Portfolio Management Team\\2 - Monthly AUM and Performance\\DEV\\2.1.1 - DEV Addepar Queries\\Outputs'
output_file = os.path.join(output_path, "testing.xlsx")
data.to_excel(output_file, sheet_name='sheet1', index=False)