In [None]:
'''

******Analytical Exercise
 
Banc ABC is a large US Bank that is protected by F5, Inc. They have approached F5 with a special request. They have set up special infrastructure in their environment to service financial aggregators like Mint and 
Plaid who login in to accounts on behalf of Banc ABC’s customers. Traditionally no attention has been paid to these aggregators and they have been allowed unfettered access to Banc ABC’s systems.

A new CISO has taken the reins at Banc ABC and has requested that we look at all the aggregator transactions and provide her with a report. She is specifically concerned about the rate at which aggregators are 
logging into accounts. She is proposing limiting each aggregator to 1 login per account per 10 min interval and would like to know what impact this will have on the aggregators.


Data Provided
You have been provided with two CSV files for the purpose of this exercise.

1.	Aggregator IP List.csv
Contains a list of all the allowlisted (whitelisted) IPs that belong to known financial aggregators that are allowed by Banc ABC to access their customer’s accounts. The format of the file is
IP: contains the IP addresses belonging to the aggregators that have been allowlisted
Aggregator: contains the name of the aggregator that owns the respective allowlisted IP address

2.	Login Transactions.csv
This file contains all the logins observed by F5 over a 24 hour period on Banc ABC’s special Aggregator ONLY endpoint. This endpoint was set up specifically to process transactions for the allowlisted aggregators so as not 
to interfere with regular customer traffic which uses a different endpoint.

The format of the file is:
IP: This is the IP address from which the login transaction came
LoginSuccess: This is an indicator of whether or not the login was successful i.e. had the correct accountName and password pair
AccountName: this is the account that the transaction was trying to log into
Timestamp: this is the epoch timestamp at which the respective login transaction was received
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Required
Produce a presentation for Banc ABC’s CISO and her team covering the following questions as well as any other insights you may deem important to bring to Banc ABC’s attention.
1.	Which aggregators are accessing Banc ABC’s systems?
2.	How much volume are the aggregators sending?
3.	How many individual user accounts are being accessed by the aggregators?
4.	What is the login success rate of these aggregators and is this in line with what would be expected in your opinion?
5.	What is the average number of transactions each aggregator sends per 10min interval?
6.	What is the maximum number of transactions each aggregator sends per 10min interval?
7.	What would be the impact of the CISO’s proposal to limit each aggregator to 1 login per account per 10 min interval?

Guidance
●	Feel free to use any analytical tools you feel comfortable with to perform the analysis and prepare the presentation for the CISO. We recommend, but do not require, use of Jupyter notebooks.
●	Be prepared to show your code or worksheets and to answer questions about your thought and analytical processes.
●	Presentation will be over Zoom to the Banc ABC CISO and will be scheduled for 1 hour, including questions.

You have 3 days to prepare the deck and email it to the Banc ABC CISO (please email directly to the Threat Analytics & Reporting team distro: tar@f5.com)


'''

In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt




In [6]:
# Load the data
aggregator_ip_list = pd.read_csv(r'C:\Users\ARMANDO\Documents\Python Scripts\Challenges\F5\Aggregator IP List.csv', index_col=0)
login_transactions = pd.read_csv(r'C:\Users\ARMANDO\Documents\Python Scripts\Challenges\F5\Login Transactions.csv', index_col=0)

print(login_transactions.dtypes)
print(aggregator_ip_list.dtypes)

print('done!')

done!


In [26]:
# lets see the overal info from files

print(aggregator_ip_list.describe().round(2))
print(login_transactions.describe().round(2))
#we can see IP \neq ip, so lets change to IP instead of ip


                  ip Aggregator
count            315        315
unique           315          7
top     54.208.59.10  YoungOnes
freq               1        249
                           timestamp
count                        4092253
mean   2018-10-28 17:04:39.293569536
min       2018-10-28 05:00:00.016000
25%       2018-10-28 10:50:54.060000
50%    2018-10-28 17:28:15.920999936
75%       2018-10-28 23:03:24.952000
max       2018-10-29 04:59:59.982000


In [27]:
aggregator_ip_list.rename(columns={'ip': 'IP'}, inplace=True)
aggregator_ip_list.head()

Unnamed: 0,IP,Aggregator
0,54.208.59.10,AWS
1,54.88.74.128,AWS
2,54.88.202.28,AWS
3,52.70.160.54,FinTech
8,52.44.118.176,FinTech


In [20]:

# Convert the timestamp from milliseconds to a datetime object for easier manipulation
# login_transactions['Timestamp'] = pd.to_datetime(login_transactions['Timestamp'], unit='ms')


ip            object
Aggregator    object
dtype: object

In [18]:

# Convert the timestamp to a datetime object for easier manipulation
login_transactions['timestamp'] = pd.to_datetime(login_transactions['timestamp'], unit='ms')
print('format complete!')

format complete!


In [19]:


# Merge the dataframes to add aggregator names to the login transactions
merged_data = login_transactions.merge(aggregator_ip_list, on='IP', how='left')

KeyError: 'IP'

In [None]:


# 1. Which aggregators are accessing Banc ABC’s systems?
aggregators_accessing = merged_data['Aggregator'].unique()

# 2. How much volume are the aggregators sending?
aggregator_volume = merged_data['Aggregator'].value_counts()

# 3. How many individual user accounts are being accessed by the aggregators?
individual_accounts = merged_data.groupby('Aggregator')['AccountName'].nunique()

# 4. What is the login success rate of these aggregators?
login_success_rate = merged_data.groupby('Aggregator')['LoginSuccess'].mean()

# 5. What is the average number of transactions each aggregator sends per 10min interval?
merged_data['TimeInterval'] = merged_data['Timestamp'].dt.floor('10T')
average_transactions_per_10min = merged_data.groupby(['Aggregator', 'TimeInterval']).size().groupby('Aggregator').mean()

# 6. What is the maximum number of transactions each aggregator sends per 10min interval?
max_transactions_per_10min = merged_data.groupby(['Aggregator', 'TimeInterval']).size().groupby('Aggregator').max()

# 7. Impact of limiting each aggregator to 1 login per account per 10 min interval
# Calculate the number of transactions per account per 10min interval
transactions_per_account_10min = merged_data.groupby(['Aggregator', 'AccountName', 'TimeInterval']).size()

# Count the number of times this exceeds 1 (i.e., the excess transactions)
excess_transactions = transactions_per_account_10min[transactions_per_account_10min > 1] - 1
impact_of_limitation = excess_transactions.groupby('Aggregator').sum()

# Prepare the results for presentation
results = {
    'Aggregators Accessing': aggregators_accessing,
    'Aggregator Volume': aggregator_volume,
    'Individual Accounts': individual_accounts,
    'Login Success Rate': login_success_rate,
    'Average Transactions per 10min': average_transactions_per_10min,
    'Max Transactions per 10min': max_transactions_per_10min,
    'Impact of Limitation': impact_of_limitation
}

# Display results
for key, value in results.items():
    print(f"{key}:\n{value}\n")



In [18]:
#Vamos a abrir los archivos para visualizarlos solamente:
agg.head()

Unnamed: 0,ip,Aggregator
0,54.208.59.10,AWS
1,54.88.74.128,AWS
2,54.88.202.28,AWS
3,52.70.160.54,FinTech
8,52.44.118.176,FinTech


In [15]:
import pandas as pd
tr=pd.read_csv(r'C:\Users\ARMANDO\Documents\Python Scripts\Challenges\F5\Login Transactions.csv', index_col=0 )
agg=pd.read_csv(r'C:\Users\ARMANDO\Documents\Python Scripts\Challenges\F5\Aggregator IP List.csv', index_col=0)
tr

Unnamed: 0,IP,LoginSuccess,AccountName,timestamp
0,54.208.59.10,Fail,756bb790d96873a,1540702800016
1,206.108.41.103,Success,7b3eb9367400c1b,1540702800022
2,216.34.61.115,Success,abfa625be56908a,1540702800041
3,52.44.118.176,Fail,dad678e942a965c,1540702800046
4,64.41.181.29,Success,e3014e35d5ec49b,1540702800067
...,...,...,...,...
4092248,216.34.61.225,Fail,8dd393aa2fc2d3e,1540789199913
4092249,216.34.61.167,Success,ac59730a398c7f0,1540789199929
4092250,54.208.59.10,Fail,33f59293e9e34eb,1540789199939
4092251,54.208.59.10,Fail,e0e0836a6e4833c,1540789199961


In [19]:
agg['Aggregator'].unique()

array(['AWS', 'FinTech', 'Insight', 'YoungOnes', 'PayTM', 'Funtown',
       'FunTown'], dtype=object)

In [20]:
agg['ip'].unique()

array(['54.208.59.10', '54.88.74.128', '54.88.202.28', '52.70.160.54',
       '52.44.118.176', '52.70.161.90', '52.0.123.48', '206.108.41.101',
       '206.108.41.102', '206.108.41.103', '34.230.198.253',
       '34.228.103.5', '34.230.198.27', '34.232.227.45', '206.108.41.105',
       '206.108.41.106', '206.108.41.107', '206.108.41.108',
       '64.41.181.183', '64.41.181.155', '64.41.181.105', '216.34.61.75',
       '216.34.61.165', '216.34.61.191', '216.34.61.157', '216.34.61.181',
       '64.41.181.177', '64.41.181.157', '216.34.61.115', '216.34.61.89',
       '64.41.181.215', '64.41.181.135', '216.34.61.155', '64.41.181.17',
       '216.34.61.183', '216.34.61.213', '216.34.61.133', '216.34.61.199',
       '216.34.61.99', '64.41.181.149', '216.34.61.167', '216.34.61.93',
       '216.34.61.193', '64.41.181.127', '216.34.61.109', '216.34.61.103',
       '64.41.181.93', '64.41.181.185', '216.34.61.229', '216.34.61.189',
       '64.41.181.43', '216.34.61.53', '64.41.181.97', '216.34.61