In [2]:
'''
NAME    : nick_IP_extraction.
PURPOSE : Process raw Nick-IP and add it to the existing repository. 
          Takes input that has been extracted from the sidebar and search of apex-dc++.

FOR IMPORTING FORM SIDEBAR
- Input  : sidebar_input.csv
- Output : sidebar_output.csv

FOR IMPORTING FORM SEARCH
- Input  : search_input.csv
- Output : search_output.csv
'''

# File Name
file_name = 'nick_IP_extraction'

# PRELIMINARY

In [3]:
'''IMPORTING LIBRARIES'''

import pandas as pd
import numpy as np
import os
from collections import defaultdict
from pathlib import Path

pd.set_option('display.max_rows', None)                                   

In [4]:
'''REMOVING PREVIOUS OUTPUT FILES'''

for i in ['sidebar_output.csv', 
          'search_output.csv']:
    if Path(i).is_file():
            os.remove(i)

# SIDEBAR IMPORT

In [None]:
'''IMPORTING FROM SIDEBAR'''

# Read "sidebar_input" csv containing all nicks to "sidebar" dataframe
sidebar = pd.read_csv('sidebar_input.csv')
print("Number of Imported Nicks :", len(sidebar))

# Filters 'Nick' and 'Ip' from raw input data
sidebar = sidebar[['Nick', 'Ip']].copy()           

# Modify the "sidebar" dataframe to include only entries with an IP
sidebar = sidebar.dropna(axis=0, how='any')         

# Export "sidebar" dataframe as a csv
sidebar.to_csv('sidebar_output.csv', index=False)
print("Number of Valid Nicks :", len(sidebar))

# Checks if 'Nick' is an entry under 'Nick' column 
print("\nImporting mistake? :",'Nick' in set(sidebar['Nick']))           # should return False 

# Checks if any entry has a missing 'Ip' value
print("Is any IP missing? :", sidebar.isnull().values.any())             # should return False 

sidebar.head()

# SEARCH IMPORT

In [None]:
'''IMPORTING FROM SEARCH'''

# Read input csv containing all nicks to "search" dataframe
search = pd.read_csv('search_input.csv')

# Filters 'User' and 'Ip' from raw input data
search = search[['User', 'Ip']].copy()         

# Renames 'User' to 'Nick'
search.columns = ['Nick', 'Ip']                

# Filters out duplicate values
search = search.drop_duplicates('Nick')        

# Export "search" dataframe as a csv
search.to_csv('search_output.csv', index=False)

# Checks if 'Nick' is an entry under 'Nick' column
print("Importing mistake? :", 'Nick' in set(search['Nick']))              # should return False

print("Number of Nicks imported:", len(search))
search.head()

# SELECTING SOURCE

In [None]:
'''SELECTING UPDATE SOURCE'''

# Select source format between sidebar/search

### SIDEBAR
source = pd.read_csv('sidebar_output.csv')        

### SEARCH
# source = pd.read_csv('search_output.csv')         

print("Number of Nicks imported to source :", len(source))
source.head()

# HANDLING MASTER

In [None]:
'''INITIALISING MASTER'''
### NOTE: SKIP AFTER INITIAL TRIAL.

def make_master():
        master = pd.DataFrame({'Nick' : [], 'Ip' : []})
        master.columns = ['Nick', 'Ip']
        return master

master = make_master()

In [3]:
'''RESTORE OLD MASTER'''
### NOTE: USE AFTER INITIAL TRIAL.

# Provide latest_backup_file
latest_backup_file = 'nick_backup_1.csv'

master = pd.read_csv(os.path.join('nick_backup',latest_backup_file))

print("Number of Nicks in Restored Master :",len(master))
master.head()

Number of Nicks in Restored Master : 399


Unnamed: 0,Nick,Ip
0,Overlord,10.4.9.91
1,The_Doctor,10.4.9.65
2,tyrionisawesome,10.4.9.33
3,buckyball..,10.4.9.212
4,EnigMaXX,10.4.9.193


# UPDATING MASTER

In [None]:
'''UPDATING MASTER''' 

# Saving the old master data as "master_old" dataframe
master_old = master

# Modify the "master" dataframe to include the "source" dataframe
master = pd.concat([master, source], ignore_index=True)
master.columns = ['Nick', 'Ip']

# Removing duplicates that were already present in the old master
master = master.drop_duplicates('Nick')   

#master = master.sort_values('Nick')             # Sort by Nick
#master = master.sort_values('Ip')               # Sort by IP

print("Length of Master Database before update :",len(master_old))
print("Length of Master Database after update  :",len(master))
master.head()

# BACKUP AND SUMMARY

In [None]:
'''BACKUP'''

# Provide backup_as
backup_as = 'nick_backup_2.csv'

# Exporting the 'master' dataframe to csv file as a backup
master.to_csv(os.path.join('nick_backup',backup_as), index=False, header=True)

In [None]:
'''SUMMARY'''

# Determining the new items that have been added
new_added = master[~master.index.isin(master_old.index)]

print("No of new items added : ", len(new_added))
new_added.head()

# SEARCH FUNCTION

In [None]:
# Maps IP to hostel
host = {"10.4.5"  : "AH1", 
        "10.4.6"  : "AH2", 
        "10.4.7"  : "AH3",
        "10.4.8"  : "AH4", 
        "10.4.9"  : "AH5", 
        "10.4.10" : "AH6",
        "10.4.11" : "AH7", 
        "10.4.12" : "AH8", 
        "10.3.9"  : "AH9",
        "10.3.10" : "CH1", 
        "10.3.11" : "CH1", 
        "10.3.12" : "CH2",
        "10.3.13" : "CH3", 
        "10.3.14" : "CH4", 
        "10.3.15" : "CH5/6",
        "10.3.16" : "CH5/6",
        "10.4.15" : "CH7",}
host = defaultdict(lambda: 'Other', host)

def get_host(ip):
    # Return hostel for a given IP
    l = (ip[5:].find('.')) + 5
    hostel = host[str(ip[:l])]
    return(hostel)

In [8]:
'''SEARCH FUNCTION - NICK'''
# Returns information about queried nick

# Enter nick to query
nick_query = "gazixxxx"

# Checks if Ip is available for nick
print("Is Nick in Database? :",nick_query in set(master['Nick']))    

ind = list(np.where(master['Nick'] == nick_query)[0])
print("\nDatabase Indice : " + str(ind[0]))
print("IP of " + nick_query + "     : " + master.Ip[ind[0]])
print("Hostel of " + nick_query + " : " + get_host(master.Ip[ind[0]]))

Is Nick in Database? : True

Database Indice : 69
IP of gazixxxx     : 10.4.12.169
Hostel of gazixxxx : AH8


In [7]:
'''SEARCH FUNCTION - HOSTEL'''
# Returns information about queried hostel

# Enter hostel to query
hostel_query = "AH8"

trial = master.sort_values('Nick')

host_link = trial
host_link["Hostel"] = ''

for i in range(len(trial)):
    ip = host_link.iat[i,1]
    l = (ip[5:].find('.')) + 5
    hostel = host[str(ip[:l])]
    host_link.iat[i,2] = hostel

print("Nicks belonging to {} :".format(hostel_query))
for i in (np.where(host_link['Hostel'] == hostel_query)[0]):
    print("-", host_link.iat[i,0])

Nicks belonging to AH8 :
- A.I.M
- AgentP
- BalikaVadhu
- BertieWooster
- Bluten11
- ChotaBheemcyvhojk
- DIEfshsha
- DeFaLT
- Deathstroke
- Dexter
- EngiNerd
- GrimReoiukujbn
- Hallelujah
- Henry98
- LedZep
- Mandir_Yahi_Banega
- Mark42
- OnumBarr
- PehchanKaun??
- Primalr3d
- Primalr3d_ki_mamisss
- RAandome3sssss
- RajaBabu
- Rb7
- SestiRendi
- Sheldon@cooper
- SlySeeker
- Sol_Invictus
- Vanguard1
- aaargh
- bababkchod
- bat7
- bear
- chivi
- daishinkan
- dnbsa,dm.a
- fdsf
- fevikwik
- gazixxxx
- gloa
- goldameir
- insidious
- oggy
- simi_kk
- supreme
- wat
- zaheer
- ~katappa!~
