In [2]:
import pandas as pd
from datetime import timedelta
import os
import numpy as np
from tqdm import tqdm

In [3]:
trading_history = pd.read_csv('dropped_files/trading_history_data.csv')
overview = pd.read_csv('dropped_files/overview_page_data.csv')
er = pd.read_csv('dropped_files/exchangerate.csv')
compute = pd.read_csv('dropped_files/compute1.csv')

trading_history.loc[:, 'date_open'] = pd.to_datetime(trading_history['date_open'])
trading_history.loc[:, 'date_closed'] = pd.to_datetime(trading_history['date_closed'])

overview.loc[:, 'scraped_on'] = pd.to_datetime(overview['scraped_on'])

er.loc[:, 'time'] = pd.to_datetime(er['time'])

compute.loc[:, 'open_date'] = pd.to_datetime(compute['open_date'])
compute.loc[:, 'close_date'] = pd.to_datetime(compute['close_date'])
compute.loc[:, 'open_date2'] = pd.to_datetime(compute['open_date2'])

In [4]:
trading_history = trading_history.drop_duplicates()

In [5]:
trading_history.head()

Unnamed: 0,user_id,trade_id,lots,trade_type,date_open,date_closed,price_open,price_closed,currency
0,24632,514812394,1.0,BUY,2019-10-14 20:11:11,2019-11-17 17:23:15,108.673,109.78,CHF/JPY
1,388129,517500344,0.01,BUY,2019-11-15 16:00:00,2019-11-17 17:01:01,1.89337,1.89552,GBP/AUD
2,373754,517456832,0.2,SELL,2019-11-15 06:36:31,2019-11-15 16:57:49,1.89611,1.89197,GBP/AUD
3,300553,517256771,1.0,BUY,2019-11-13 12:35:06,2019-11-15 16:02:09,0.67633,0.67429,AUD/CHF
4,300553,517365782,1.0,BUY,2019-11-14 09:16:01,2019-11-15 16:02:05,0.67028,0.67431,AUD/CHF


In [7]:
er

Unnamed: 0,time,high,low,close,unit
0,2013-05-16 00:00:00,0.95671,0.95534,0.95647,AUDCHF
1,2013-05-16 01:00:00,0.95663,0.95537,0.95576,AUDCHF
2,2013-05-16 02:00:00,0.95645,0.95538,0.95550,AUDCHF
3,2013-05-16 03:00:00,0.95577,0.95392,0.95398,AUDCHF
4,2013-05-16 04:00:00,0.95410,0.95331,0.95404,AUDCHF
...,...,...,...,...,...
1114662,2021-05-26 10:00:00,0.93969,0.93888,0.93933,AUDCAD
1114663,2021-05-26 11:00:00,0.93978,0.93918,0.93935,AUDCAD
1114664,2021-05-26 12:00:00,0.94087,0.93911,0.94001,AUDCAD
1114665,2021-05-26 13:00:00,0.94001,0.93816,0.93823,AUDCAD


In [4]:
# Functions

def create_user_scrapes_dict(overview_df):
    user_scrapes = {}
    for _, row in overview_df.iterrows():
        user_id = row['userid']
        scrape_date = row['scraped_on']
        views = row['viewed_no_of_times']
        weeks = row['weeks']
        profit_in_pips = row['profit_in_pips']
        trades = row['trades']
        if user_id not in user_scrapes:
            user_scrapes[user_id] = []
        user_scrapes[user_id].append((scrape_date, views, weeks, profit_in_pips, trades))
    
    # Sort scrape dates for each user
    for user_id in user_scrapes:
        user_scrapes[user_id].sort(key=lambda x: x[0])
    
    return user_scrapes


def create_derived_variables(trading_history_df, user_scrapes):
    derived_variables = []
    trade_counter = {}
    
    for _, trade in trading_history_df.iterrows():
        user_id = trade['user_id']
        trade_id = trade['trade_id']
        date_open = trade['date_open']
        date_closed = trade['date_closed']
        currency = trade['currency']
        lots = trade['lots']
        
        # Assign trade_no
        if user_id not in trade_counter:
            trade_counter[user_id] = 1
        else:
            trade_counter[user_id] += 1
        trade_no = trade_counter[user_id]
        
        if user_id in user_scrapes:
            relevant_scrapes = [
                (scrape_date, views, weeks, profit_in_pips, trades) 
                for scrape_date, views, weeks, profit_in_pips, trades in user_scrapes[user_id]
                if date_open <= scrape_date <= date_closed
            ]
            
            prev_views = None
            for obs_num, (scrape_date, views, weeks, profit_in_pips, trades) in enumerate(relevant_scrapes, start=1):
                new_row = {
                    'user_id': user_id,
                    'trade_id': trade_id,
                    'trade_no': trade_no,
                    'observation_number': obs_num,
                    'date_open': date_open,
                    'date_closed': date_closed,
                    'date_observed': scrape_date,
                    'views': views,
                    'views_previous': prev_views,
                    'currency': currency,
                    'lots': lots,
                    'cum_day': weeks * 7,  # Converting weeks to days
                    'cum_profit_in_pips': profit_in_pips,
                    'cum_trades': trades
                }
                derived_variables.append(new_row)
                prev_views = views
            
            # Check if last observed date is less than date_closed
            if relevant_scrapes and relevant_scrapes[-1][0] < date_closed:
                last_scrape = relevant_scrapes[-1]
                derived_variables.append({
                    'user_id': user_id,
                    'trade_id': trade_id,
                    'trade_no': trade_no,
                    'observation_number': len(relevant_scrapes) + 1,
                    'date_open': date_open,
                    'date_closed': date_closed,
                    'date_observed': date_closed,
                    'views': last_scrape[1], # Using the last known value
                    'views_previous': last_scrape[1],
                    'currency': currency,
                    'lots': lots,
                    'cum_day': last_scrape[2] * 7,  # Using the last known value
                    'cum_profit_in_pips': last_scrape[3],  # Using the last known value
                    'cum_trades': last_scrape[4]  # Using the last known value
                })
    
    return pd.DataFrame(derived_variables)


def get_lowest_rate(currency, start_date, end_date):
    if currency in exchange_rates_dict:
        rates = [rate for date, rate in exchange_rates_dict[currency].items() 
                 if start_date <= date <= end_date]
        return min(rates) if rates else None
    return None

def get_24hr_avg_rate(currency, end_date):
    if currency in exchange_rates_dict:
        start_date = end_date - timedelta(hours=24)
        rates = [rate for date, rate in exchange_rates_dict[currency].items() 
                 if start_date <= date < end_date]
        return sum(rates) / len(rates) if rates else None
    return None

def get_avg_rate(currency, start_date, end_date):
    if currency in exchange_rates_dict:
        rates = [rate for date, rate in exchange_rates_dict[currency].items() 
                 if start_date <= date <= end_date]
        return sum(rates) / len(rates) if rates else None
    return None

def calculate_rates(row):
    currency = row['currency'].replace('/', '')
    date_open = row['date_open'].floor('h')
    date_observed = row['date_observed'].floor('h')
    date_closed = row['date_closed'].floor('h')
    
    if currency in exchange_rates_dict:
        lowest_rate = get_lowest_rate(currency, date_open, date_observed)
        close_rate = exchange_rates_dict[currency].get(date_closed)
        avg_24hr = get_24hr_avg_rate(currency, date_closed)
        fx_rate_idx = get_avg_rate(currency, date_open, date_observed)
        
        neg_rate_diff = close_rate - lowest_rate if lowest_rate and close_rate else None
        
        return pd.Series({
            'neg_rate_diff': neg_rate_diff,
            '24hr_avg_rate': avg_24hr,
            'fx_rate_idx': fx_rate_idx
        })
    
    return pd.Series({'neg_rate_diff': None, '24hr_avg_rate': None, 'fx_rate_idx': None})

# Keep the existing gain calculation
def calculate_gain(row_curr):
    currency_curr = row_curr['currency'].replace('/', '')
    date_open_curr = row_curr['date_open'].floor('h')
    date_observed_curr = row_curr['date_observed'].floor('h')
    
    if currency_curr in exchange_rates_dict:
        currency_rates = exchange_rates_dict[currency_curr]
        
        if date_open_curr in currency_rates and date_observed_curr in currency_rates:
            price_open_curr = currency_rates[date_open_curr]
            price_observed = currency_rates[date_observed_curr]
            
            return 1 if price_observed > price_open_curr else 0
    
    return None

def get_rank(user_id, date):
    user_ranks = rank_dict.get(user_id, {})
    return user_ranks.get(date, None)

def get_average_rank(user_id, end_date):
    start_date = end_date - timedelta(days=30)
    user_ranks = rank_dict.get(user_id, {})
    relevant_ranks = [rank for date, rank in user_ranks.items() if start_date <= date <= end_date]
    return sum(relevant_ranks) / len(relevant_ranks) if relevant_ranks else None

def calculate_rank_and_change(row):
    user_id = row['user_id']
    date_observed = row['date_observed'].floor('D')
    date_open = row['date_open'].floor('D')
    
    observed_rank = get_rank(user_id, date_observed)
    open_rank = get_rank(user_id, date_open)
    avg_rank_1m = get_average_rank(user_id, date_observed)
    
    if observed_rank is None or open_rank is None:
            return pd.Series({
            'rank': None,
            'rank_change': None,
            'avg_rank_1m': None
        }, index=['rank', 'rank_change', 'avg_rank_1m'])
    
    rank_change = open_rank - observed_rank
    return pd.Series({
        'rank': observed_rank,
        'rank_change': rank_change,
        'avg_rank_1m': avg_rank_1m
    }, index=['rank', 'rank_change', 'avg_rank_1m'])


def count_concurrent_trades_and_calculate_dispos_effect(row, trading_history_df, exchange_rates_dict):
    user_id = row['user_id']
    date_observed = pd.to_datetime(row['date_observed'])


    trading_history_df['date_open'] = pd.to_datetime(trading_history_df['date_open'])
    trading_history_df['date_closed'] = pd.to_datetime(trading_history_df['date_closed'])
    
    # Filter relevant trades
    relevant_trades = trading_history_df[
        (trading_history_df['user_id'] == user_id) &
        (trading_history_df['date_open'] <= date_observed) &
        (trading_history_df['date_closed'] >= date_observed)
    ]
    
    # Count concurrent transactions and unique currencies
    concurrent_count = len(relevant_trades)
    unique_currencies = relevant_trades['currency'].nunique()
    
    # Divide into open and closed trades
    open_trades = relevant_trades[relevant_trades['date_closed'].dt.date > date_observed.date()]
    closed_trades = relevant_trades[relevant_trades['date_closed'].dt.date == date_observed.date()]
    
    def calculate_gain_loss(trades):
        no_gain, no_loss = 0, 0
        for _, trade in trades.iterrows():
            currency = trade['currency'].replace('/', '')
            date_open = trade['date_open'].floor('h')
            date_observed_rounded = date_observed.floor('h')
            
            if currency in exchange_rates_dict:
                open_rate = exchange_rates_dict[currency].get(date_open)
                observed_rate = exchange_rates_dict[currency].get(date_observed_rounded)
                
                if open_rate is not None and observed_rate is not None:
                    if observed_rate > open_rate:
                        no_gain += 1
                    elif observed_rate < open_rate:
                        no_loss += 1
        
        return no_gain, no_loss
    
    no_open_gain, no_open_loss = calculate_gain_loss(open_trades)
    no_closed_gain, no_closed_loss = calculate_gain_loss(closed_trades)
    
    # Calculate proportions
    total_gains = no_closed_gain + no_open_gain
    total_losses = no_closed_loss + no_open_loss
    
    prop_closed_gains = no_closed_gain / total_gains if total_gains > 0 else 0
    prop_closed_loss = no_closed_loss / total_losses if total_losses > 0 else 0
    
    # Calculate disposition effect
    dispos_effect = prop_closed_gains - prop_closed_loss
    
    return pd.Series({
        'conc_transactions': concurrent_count,
        'conc_currencies': unique_currencies,
        'n_open_gain': no_open_gain,
        'n_open_loss': no_open_loss,
        'n_closed_gain': no_closed_gain,
        'n_closed_loss': no_closed_loss,
        'prop_closed_gains': prop_closed_gains,
        'prop_closed_loss': prop_closed_loss,
        'dispos_effect': dispos_effect
    })


def add_compute_columns(row, compute_df):
    user_id = row['user_id']
    trade_id = row['trade_id']
    date_observed = row['date_observed'].floor('D')
    
    # Find the matching row in compute_df
    matching_row = compute_df[
        (compute_df['user_id'] == user_id) & 
        (compute_df['trade_id'] == trade_id) & 
        (compute_df['open_date2'] == date_observed)
    ]
    
    if not matching_row.empty:
        return pd.Series({
            'followers': matching_row['followers'].values[0],
            'amount_following': matching_row['amount_following'].values[0],
            'total_follower_profit': matching_row['total_follower_profit'].values[0],
            'gain_days': matching_row['cum_gain_days'].values[0],
            'avg_slippage': matching_row['avg_slippage'].values[0]
        })
    else:
        return pd.Series({
            'followers': None,
            'amount_following': None,
            'total_follower_profit': None,
            'gain_days': None,
            'avg_slippage': None
        })


def find_cohort(user_id, trade_id, date_open, compute_df):
    print(f"Finding cohort for user_id: {user_id}, trade_id: {trade_id}, date_open: {date_open}")
    for i in range(4):  # Try original date and 3 more days
        date_open = date_open.floor('D')
        date = date_open + timedelta(days=i)
        user_data = compute_df[(compute_df['user_id'] == user_id) & (compute_df['open_date2'] == date)]
        
        print(f"  Checking date: {date}, user data found: {not user_data.empty}")
        
        if not user_data.empty:
            user_rank = user_data['zulu_rank'].values[0]
            date_ranks = compute_df[compute_df['open_date2'] == date].sort_values('zulu_rank')
            print(f"  Min rank: {date_ranks['zulu_rank'].min()}, Max rank: {date_ranks['zulu_rank'].max()}")
            print(f"  User rank: {user_rank}, Total ranks for this date: {len(date_ranks)}")
            
            # Find users with ranks close to the user's rank
            lower_ranks = date_ranks[date_ranks['zulu_rank'] <= user_rank].tail(6)  # Include the user
            higher_ranks = date_ranks[date_ranks['zulu_rank'] > user_rank].head(5)
            
            cohort = pd.concat([lower_ranks, higher_ranks])['user_id'].tolist()
            
            print(f"  Lower ranks: {lower_ranks['zulu_rank'].tolist()}")
            print(f"  Higher ranks: {higher_ranks['zulu_rank'].tolist()}")
            print(f"  Cohort found: {cohort}")
            print(f"  Cohort size: {len(cohort)}")
            
            return cohort
    
    print("  Cohort not found after checking 4 days")
    return None

def calculate_cohort_min_rank(row, cohort_dict, compute_df):
    user_id, trade_id = row['user_id'], row['trade_id']
    date_open, date_observed = row['date_open'].floor('D'), row['date_observed'].floor('D')
    
    if (user_id, trade_id) not in cohort_dict:
        cohort_dict[(user_id, trade_id)] = find_cohort(user_id, trade_id, date_open, compute_df)
    
    cohort = cohort_dict[(user_id, trade_id)]
    
    if cohort is None:
        return None
    
    date_ranks = compute_df[(compute_df['open_date2'] == date_observed) & (compute_df['user_id'].isin(cohort))]
    min_rank = None
    if not date_ranks.empty:
        min_rank = date_ranks['zulu_rank'].min()
    
    return min_rank

In [5]:
# Dicts

exchange_rates_dict = {}
for _, row in er.iterrows():
    currency = row['unit']
    date = row['time']
    close_price = row['close']
    
    if currency not in exchange_rates_dict:
        exchange_rates_dict[currency] = {}
    
    exchange_rates_dict[currency][date] = close_price
    
rank_dict = {}
for _, row in compute.iterrows():
    user_id = row['user_id']
    date = row['open_date2']
    zulu_rank = row['zulu_rank']
    
    if user_id not in rank_dict:
        rank_dict[user_id] = {}
    rank_dict[user_id][date] = zulu_rank

In [6]:
def final_derived_dataframe(trading_func, overview_func, er_dict_func, compute_func):
    overview_func_scrape_dict = create_user_scrapes_dict(overview_func)
    derived_variables = create_derived_variables(trading_func, overview_func_scrape_dict)
    
    derived_variables['Tij'] = (derived_variables['date_observed'] - derived_variables['date_open']).dt.total_seconds()
    
    derived_variables[['neg_rate_diff', '24hr_avg_rate', 'fx_rate_idx']] = derived_variables.apply(calculate_rates, axis=1)

    derived_variables['gain'] = derived_variables.apply(calculate_gain, axis=1)
    
    derived_variables[['rank', 'rank_change', 'avg_rank_1m']] = derived_variables.apply(calculate_rank_and_change, axis=1)
    
    derived_variables[['conc_transactions', 'conc_currencies', 'dispos_effect']] = derived_variables.apply(lambda row: count_concurrent_trades_and_calculate_dispos_effect(row, trading_func, er_dict_func), axis=1)
    
    new_columns = ['followers', 'amount_following', 'total_follower_profit', 'gain_days', 'avg_slippage']
    derived_variables[new_columns] = derived_variables.apply(lambda row: add_compute_columns(row, compute_func), axis=1)
    
    derived_variables = derived_variables.sort_values(['user_id', 'trade_no', 'observation_number'])
    
    # Initialize the prev_followers column with None
    derived_variables['prev_followers'] = None
    
    # Initialize variables to keep track of the previous row
    prev_user = None
    prev_trade = None
    prev_followers = None
    
    # Iterate through the DataFrame
    for index, row in derived_variables.iterrows():
        current_user = row['user_id']
        current_trade = row['trade_no']
        current_obs = row['observation_number']
        
        # If this is a new user or a new trade, reset prev_followers
        if current_user != prev_user or current_trade != prev_trade:
            prev_followers = None
        elif current_obs > 1:  # Not the first observation of the trade
            derived_variables.at[index, 'prev_followers'] = prev_followers
        
        # Update the previous values for the next iteration
        prev_user = current_user
        prev_trade = current_trade
        prev_followers = row['followers']
        
    directory = 'garsh-files'

    garsh_dict = {}
    
    for filename in os.listdir(directory):
        currency = filename[6:-4] 
        
        df = pd.read_csv(f"{directory}/{filename}")
        
        df['date'] = pd.to_datetime(df['date'])
        
        df['log-garch'] = df['log-garch'].astype(float).round(3)
        
        garsh_dict[currency] = dict(zip(df['date'], df['log-garch']))
        
    for index, row in derived_variables.iterrows():
        currency = row['currency'].replace('/', '')
        date_obs = row['date_observed'].floor('D')
        
        if currency in garsh_dict:
            if date_obs in garsh_dict[currency]:
                derived_variables.at[index, 'log_garch'] = garsh_dict[currency][date_obs]
                
    derived_variables['date_open'] = pd.to_datetime(derived_variables['date_open'])
    derived_variables['date_observed'] = pd.to_datetime(derived_variables['date_observed'])
    compute_func['open_date2'] = pd.to_datetime(compute_func['open_date2'])
    
    # Initialize cohort dictionary
    cohort_dict = {}
    
    # Calculate cohort_min_rank
    derived_variables['cohort_min_rank'] = derived_variables.apply(lambda row: calculate_cohort_min_rank(row, cohort_dict, compute_func), axis=1)
    
    derived_variables = derived_variables.sort_values(['user_id', 'trade_id', 'observation_number'])
    
    # Initialize views_change column with zeros
    derived_variables['views_change'] = 0
    
    # Initialize variables to keep track of the previous row
    prev_user = None
    prev_trade = None
    first_views = None
    
    # Iterate through the DataFrame
    for index, row in derived_variables.iterrows():
        current_user = row['user_id']
        current_trade = row['trade_id']
        current_views = row['views']
        
        # If this is a new user or a new trade, reset first_views
        if current_user != prev_user or current_trade != prev_trade:
            first_views = current_views
        
        # Calculate views_change
        views_change = current_views - first_views
        derived_variables.at[index, 'views_change'] = views_change
        
        # Update the previous values for the next iteration
        prev_user = current_user
        prev_trade = current_trade
        
    derived_variables_columns = 'user_id, trade_id, trade_no, observation_number, date_open, date_closed, date_observed, views, views_previous, views_change, currency, lots, cum_day, cum_profit_in_pips, cum_trades, Tij, neg_rate_diff, 24hr_avg_rate, fx_rate_idx, gain, rank, rank_change, avg_rank_1m, cohort_min_rank, conc_transactions, conc_currencies, dispos_effect, followers, prev_followers, amount_following, total_follower_profit, gain_days, avg_slippage, log_garch'
    derived_variables_columns = derived_variables_columns.split(', ')
    
    derived_variables = derived_variables.reindex(columns=derived_variables_columns)
    
    derived_variables = derived_variables.replace([None, np.nan, '', 'NaN', 'nan'], '.', regex=True)

    return derived_variables

In [7]:
def dispos_effect_derived_variables(trading_func, overview_func, er_dict_func):
    overview_func_scrape_dict = create_user_scrapes_dict(overview_func)
    derived_variables = create_derived_variables(trading_func, overview_func_scrape_dict)
    
        
    derived_variables[['conc_transactions', 'conc_currencies', 'n_open_gain', 'n_open_loss', 'n_closed_gain', 'n_closed_loss', 'prop_closed_gains', 'prop_closed_loss', 'dispos_effect']] = derived_variables.apply(lambda row: count_concurrent_trades_and_calculate_dispos_effect(row, trading_func, er_dict_func), axis=1)
    
    derived_variables = derived_variables.replace([None, np.nan, '', 'NaN', 'nan'], '.', regex=True)

    return derived_variables

In [8]:
unique_users = trading_history['user_id'].unique()

# List to store results for each user
results = []
    
for user_id in tqdm(unique_users, desc="Processing users"):
    try:
        # Create a subset for the current user
        mask = (trading_history['user_id'] == user_id)
        trading_dummy = trading_history[mask].copy()
        trading_dummy.reset_index(drop=True, inplace=True)
        
        # Apply your function
        user_result = dispos_effect_derived_variables(trading_dummy, overview, exchange_rates_dict)
        
        # Append the result to our list
        results.append(user_result)
        
        print(f"Processed user {user_id}")
        
    except Exception as e:
        print(f"Error processing user {user_id}: {e}")
    
# Combine all results into a single DataFrame
final_result = pd.concat(results, ignore_index=True)
final_result = final_result[['user_id', 'trade_id', 'trade_no', 'observation_number', 'date_open', 'date_closed', 'date_observed', 'conc_transactions', 'conc_currencies', 'n_open_gain', 'n_open_loss', 'n_closed_gain', 'n_closed_loss', 'prop_closed_gains', 'prop_closed_loss', 'dispos_effect']]

final_csv_file_name = 'disposition_effect_all_parameters.csv'

# Save the final result to a CSV file
final_result.to_csv(f'{final_csv_file_name}', index=False)

print(f"Processing complete. Results saved to '{final_csv_file_name}'")

Processing users:   0%|          | 1/410 [00:02<16:56,  2.49s/it]

Processed user 24632


Processing users:   0%|          | 2/410 [00:04<14:29,  2.13s/it]

Processed user 388129


Processing users:   1%|          | 3/410 [00:06<14:30,  2.14s/it]

Processed user 373754


Processing users:   1%|          | 4/410 [00:09<17:33,  2.59s/it]

Processed user 300553


Processing users:   1%|          | 5/410 [00:13<20:09,  2.99s/it]

Processed user 371337


Processing users:   1%|▏         | 6/410 [00:15<17:07,  2.54s/it]

Processed user 387661


Processing users:   2%|▏         | 7/410 [00:16<15:11,  2.26s/it]

Processed user 388105


Processing users:   2%|▏         | 8/410 [00:20<18:03,  2.69s/it]

Processed user 373440


Processing users:   2%|▏         | 9/410 [00:23<18:20,  2.75s/it]

Processed user 326740


Processing users:   2%|▏         | 10/410 [00:25<17:53,  2.68s/it]

Processed user 364239


Processing users:   3%|▎         | 11/410 [00:27<15:49,  2.38s/it]

Processed user 385427


Processing users:   3%|▎         | 12/410 [00:29<14:53,  2.24s/it]

Processed user 378386


Processing users:   3%|▎         | 13/410 [00:31<14:40,  2.22s/it]

Processed user 388712


Processing users:   3%|▎         | 14/410 [00:33<14:30,  2.20s/it]

Processed user 372916


Processing users:   4%|▎         | 15/410 [00:39<21:32,  3.27s/it]

Processed user 371823


Processing users:   4%|▍         | 16/410 [00:41<18:16,  2.78s/it]

Processed user 378855


Processing users:   4%|▍         | 17/410 [00:42<16:03,  2.45s/it]

Processed user 378804


Processing users:   4%|▍         | 18/410 [00:44<14:30,  2.22s/it]

Processed user 388926


Processing users:   5%|▍         | 19/410 [00:48<17:03,  2.62s/it]

Processed user 41016


Processing users:   5%|▍         | 20/410 [00:50<15:52,  2.44s/it]

Processed user 339117


Processing users:   5%|▌         | 21/410 [00:51<14:31,  2.24s/it]

Processed user 383057


Processing users:   5%|▌         | 21/410 [00:52<16:12,  2.50s/it]


KeyboardInterrupt: 

In [17]:
final_result

Unnamed: 0,user_id,trade_id,trade_no,observation_number,date_open,date_closed,date_observed,conc_transactions,conc_currencies,n_open_gain,n_open_loss,n_closed_gain,n_closed_loss,prop_closed_gains,prop_closed_loss,dispos_effect
0,24632,514812394,1,1,2019-10-14 20:11:11,2019-11-17 17:23:15,2019-10-21 22:31:00,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
1,24632,514812394,1,2,2019-10-14 20:11:11,2019-11-17 17:23:15,2019-10-22 22:25:00,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
2,24632,514812394,1,3,2019-10-14 20:11:11,2019-11-17 17:23:15,2019-10-23 22:25:00,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
3,24632,514812394,1,4,2019-10-14 20:11:11,2019-11-17 17:23:15,2019-10-27 22:25:00,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
4,24632,514812394,1,5,2019-10-14 20:11:11,2019-11-17 17:23:15,2019-10-28 22:25:00,3.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102427,378883,506804088,1,2,2019-07-16 04:56:55,2019-07-24 06:50:33,2019-07-24 06:50:33,3.0,1.0,0.0,0.0,3.0,0.0,1.0,0.0,1.0
102428,378883,506843815,2,1,2019-07-16 09:26:09,2019-07-24 06:50:33,2019-07-24 01:29:00,3.0,1.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0
102429,378883,506843815,2,2,2019-07-16 09:26:09,2019-07-24 06:50:33,2019-07-24 06:50:33,3.0,1.0,0.0,0.0,3.0,0.0,1.0,0.0,1.0
102430,378883,506733468,3,1,2019-07-15 12:07:44,2019-07-24 06:50:33,2019-07-24 01:29:00,3.0,1.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0


In [34]:
result = final_result.loc[final_result['user_id'] == 381158]
print(result)

Empty DataFrame
Columns: [user_id, trade_id, trade_no, observation_number, date_open, date_closed, date_observed, views, views_previous, views_change, currency, lots, cum_day, cum_profit_in_pips, cum_trades, Tij, neg_rate_diff, 24hr_avg_rate, fx_rate_idx, gain, rank, rank_change, avg_rank_1m, cohort_min_rank, conc_transactions, conc_currencies, dispos_effect, followers, prev_followers, amount_following, total_follower_profit, gain_days, avg_slippage, log_garch]
Index: []

[0 rows x 34 columns]
