## Negative Selection data preparation and file creation

In [3]:
import numpy as np
import pandas as pd
import glob
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore

In [6]:
sns_files = pd.read_csv(r'C:\Users\B420615\OneDrive - Standard Bank\Py\SNS_Control.csv')

In [7]:
sns_files = sns_files['Files'].tolist()

In [8]:
path = glob.glob(r'C:\Users\B420615.STANLIB\Neg_Sel\*.csv')

#create two lists of desired column names - one for all columns and one for data columns
adjcols = ['Date','Ticker','Name','BM','AdjfcfY','CECP','TR_1M','Price','FCFY','MCap','TR_12-1','best_eps_chg','rec_chg12M','tp_chg','fcf_me','fcf_at','seas_1_1an','rec_chg']
data_cols = ['BM','AdjfcfY','CECP','TR_1M','Price','FCFY','MCap','TR_12-1','best_eps_chg','rec_chg12M','tp_chg','fcf_me','fcf_at','seas_1_1an','rec_chg']
cols_with_commas = ['Price','MCap']
cols_to_avg = ['BM','AdjfcfY','TR_1M','FCFY','TR_12-1','best_eps_chg','rec_chg12M','tp_chg','fcf_me','fcf_at','seas_1_1an']
cols_to_zero = ['CECP']
excl_cols = ['rec_chg']

In [11]:
for file in sns_files[:]:
    try:
        sentiment_df = pd.read_csv(rf'C:\Users\B420615\OneDrive - Standard Bank\Neg_Sel\{file}.csv', thousands=',')
        # clean data so that all values, apart from naming values, and including NaNs, are reflected as float values. Delete unneeded column
        sentiment_df = sentiment_df[Neg_Sel_df['Ticker'].notna()]
        sentiment_df.replace('N.A.', np.nan, inplace=True)
        sentiment_df.drop(['EPS_ESTIMATE_CHG'], axis=1, inplace=True)
        
        column_mapping = {
            'Date': 'Date',
            'Ticker': 'Ticker',
            'Short Name': 'Name',
            'BENEISH-M': 'BM',
            'AdjFCFYield': 'AdjfcfY',
            'ChgEstimateVSChgPrice': 'CECP',
            'Total Return:M-1': 'TR_1M',
            'FCF Yld:Y': 'FCFY',
            'TR_Momentum': 'TR_12-1',
            'best_eps_chg': 'best_eps_chg',
            'rec_chg_12M': 'rec_chg12M',
            'tp_change': 'tp_chg',
            'fcf_me': 'fcf_me',
            'fcf_at': 'fcf_at',
            'seas_1_1an': 'seas_1_1an',
            'rec_chg': 'rec_chg',
            'Market Cap': 'MCap',
            'Last Px': 'Price'
        }
        
        # Rename columns
        sentiment_df = sentiment_df.rename(columns=column_mapping)
        
        def convert_to_float(value):
            if isinstance(value, str):
                value = value.replace(',', '')
                try:
                    return float(value)
                except ValueError:
                    return None
            return value
        
        sentiment_df[data_cols] = sentiment_df[data_cols].apply(lambda col: col.map(convert_to_float))
        
        for col in cols_to_avg:
            if col in sentiment_df.columns:
                mean_value = Neg_Sel_df[col].astype(float).mean()
                Neg_Sel_df[col] = Neg_Sel_df[col].fillna(mean_value)
        
        # FIXED: Calculate z-scores for columns EXCLUDING seas_1_1an and rec_chg
        # These will be calculated separately after dropping NAs
        cols_for_first_zscore = [col for col in Neg_Sel_df.columns[3:-2] if col not in ['seas_1_1an', 'rec_chg']]
        z_scores = Neg_Sel_df[cols_for_first_zscore].apply(zscore)
        z_scores.columns = [f'z_{col}' for col in cols_for_first_zscore]
        Neg_Sel_df = pd.concat([Neg_Sel_df, z_scores], axis=1)
        
        excl_stocks = []
        for index, row in Neg_Sel_df.iterrows():
            if pd.isna(row['seas_1_1an']) or pd.isna(row['rec_chg']):
                excl_stocks.append(row['Ticker'])
        
        # Drop rows with NAs in seas_1_1an or rec_chg
        sentiment_df = sentiment_df.dropna(subset=['seas_1_1an', 'rec_chg'])
        
        # NOW calculate z scores for seas_1_1an and rec_chg (after dropping NAs)
        z_scores = sentiment_df[excl_cols].apply(zscore)
        z_scores.columns = ['z_seas_1_1an', 'z_rec_chg']
        sentiment_df = pd.concat([sentiment_df, z_scores], axis=1)
        
        # Calculate z-scores using specified weights
        sentiment_df['Sentiment'] = (sentiment_df['z_CECP']*0.2) + (sentiment_df['z_best_eps_chg']*0.2) + (sentiment_df['z_tp_chg']*0.2) + (sentiment_df['z_rec_chg']*0.2) + (sentiment_df['z_seas_1_1an']*0.1) + (sentiment_df['z_TR_1M']*-0.1)
        
        # Divide universe into quintiles
        sentiment_df['S_Quintile'] = pd.qcut(sentiment_df['Sentiment'], q=5, labels=False) + 1
        
        # Save processed sentiment csv file
        output_file_path = rf'C:\Users\B420615\OneDrive - Standard Bank\Neg_Sel\sentimentl_S{file}Mod.csv'
        sentiment_df.to_csv(output_file_path, index=False)
        
    except Exception as e:
        print(f"Error processing file {file}: {e}")

  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)


Error processing file NS20041231: name 'sentiment_df' is not defined
Error processing file NS20050131: name 'sentiment_df' is not defined
Error processing file NS20050228: name 'sentiment_df' is not defined
Error processing file NS20050331: name 'sentiment_df' is not defined
Error processing file NS20050430: name 'sentiment_df' is not defined
Error processing file NS20050531: name 'sentiment_df' is not defined


  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)
  results[i] = self.func(v, *self.args, **self.kwargs)
  Neg_Sel_df.replace('N.A.', np.nan, inplace=True)


Error processing file NS20050630: name 'sentiment_df' is not defined
Error processing file NS20050731: name 'sentiment_df' is not defined
Error processing file NS20050831: name 'sentiment_df' is not defined
Error processing file NS20050930: name 'sentiment_df' is not defined
Error processing file NS20051031: name 'sentiment_df' is not defined
Error processing file NS20051130: name 'sentiment_df' is not defined
Error processing file NS20051231: name 'sentiment_df' is not defined
Error processing file NS20060131: name 'sentiment_df' is not defined
Error processing file NS20060228: name 'sentiment_df' is not defined
Error processing file NS20060331: name 'sentiment_df' is not defined
Error processing file NS20060430: name 'sentiment_df' is not defined
Error processing file NS20060531: name 'sentiment_df' is not defined
Error processing file NS20060630: name 'sentiment_df' is not defined
Error processing file NS20060731: name 'sentiment_df' is not defined
Error processing file NS20060831: 

In [10]:
# Create new analyst sentiment factor and test output

sentiment_df['an_sent'] = ((sentiment_df['z_best_eps_chg']/3) + (sentiment_df['z_tp_chg']/3) + (sentiment_df['z_rec_chg']/3))

NameError: name 'sentiment_df' is not defined