# Extract AntiVirus Classification

Takes the headers generated in part 0 and creates an identical CSV with each AV as a column and their corresponding classification.

This is using Celery, make sure it's running (`celery -A tasks worker --loglevel=info`).

In [88]:
import numpy as np
import pandas as pd
import datetime
from IPython.display import display
from tasks import extract_av_classification
from celery import group

pe32 = pd.read_csv('data/pe32_samples.csv')
pe32['date'] = pd.to_datetime(pe32['date'], format='%Y/%m/%d')
# Set date as index
pe32 = pe32.set_index('date')

pe32 = pe32[:1000]
# Split into 1000 tasks
n_tasks = 1
buckets = np.array_split(pe32, n_tasks)

print('[{0}] Sending tasks...'.format(datetime.datetime.now()))
jobs = group([extract_av_classification.s(b.link.values.tolist()) for b in buckets])
print('[{0}] Done sending, waiting...'.format(datetime.datetime.now()))
result = jobs.apply_async()
result.join()
print('[{0}] All tasks done.'.format(datetime.datetime.now()))

[2017-03-10 20:05:34.404187] Sending tasks...
[2017-03-10 20:05:34.404547] Done sending, waiting...
[2017-03-10 20:05:48.918205] All tasks done.


In [89]:
temp = pd.DataFrame()

for r in result.get():
    t = np.array(r)
    temp = pd.concat([temp, pd.DataFrame(data=list(t[:,1]), index=t[:,0])])

display(temp)

Unnamed: 0,a-squared,agnitum,ahnlab-v3,antivir,antivir7,antiy-avl,authentium,avast,avast5,avg,...,symantec,thehacker,totaldefense,trendmicro,trendmicro-housecall,una,vba32,vipre,virobot,virusbuster
YTllNThjNjY1MzRlNGFiYzg5ZjYzM2ExOWI3MzNjNTI,,i-worm.chir.b,win32/chihack.worm.10748,w32/chir.b,,worm/win32.runouce.b,,win32:runonce [trj],,win32/chir.b@mm,...,w32.chir.b@mm,w32/chir.b.dannado,win32/chir.b,pe_chir.b-o,pe_chir.b-o,,worm.runouce,win32.chir.b (v),win32.chir.b,
MTRiZjc0YTA2MmYwNDA2NDk4MDA1YzU2NzJkY2ZkYjc,,clean,clean,clean,,clean,,clean,,clean,...,clean,clean,clean,clean,clean,,clean,clean,clean,
ODdkNWUzNDQ3ZGI4NDUzYmIyZmY1ZDU2NmI5NWJiYmU,,clean,clean,clean,,clean,,js:agent-bpa [trj],,clean,...,clean,clean,win32/jorik.kj,clean,hv_gatrinew_ca223f34.tomc,,clean,clean,clean,
ZDNmZWIwMzBlODhiNDk1MDhhODgyNmI3YTc3NDQzYmI,,clean,clean,clean,,clean,,clean,,win32/cryptor,...,clean,clean,clean,clean,clean,,clean,clean,clean,
MTQ5YjJkZGE1YjI5NDJlYjhkOTEzMTlmYzE5MjlmMDc,,worm.vobfus!zhmlddpxupq,clean,tr/dropper.gen,,clean,,win32:malware-gen,,dropper.generic8.ra,...,w32.changeup!gen36,trojan/pronny.kt,clean,worm_vobfus.smkt,troj_gen.f47v0412,,worm.autorun,trojan.win32.generic.pak!cobra,clean,
N2UyMzk1MjZmMWE2NGY5ZGExNzJmMDgzNTZkZWIyMDg,,,,,,,,,,,...,,,,,,,,,,
MmMzODhhNDM1NWE4NGRmMmE1MDc3YjYyMzBmZjFjZGQ,,clean,clean,tr/dropper.gen2,,clean,,clean,,clean,...,clean,clean,clean,clean,clean,,clean,clean,clean,
ZTA5Yjg4NDkxYWYwNDRjYjhlZDJhODczMzNhNzUwNGY,,,,,,,,,,,...,,,,,,,,,,
ZDgxZDdlYTQ3MjI1NGE2ZmJjZjNkN2QyYWQ5OWM1MzU,,,clean,tr/ransom.fl.61,,trojan/win32.blocker.gen,,win32:smokeldr-e [trj],,clean,...,clean,trojan/blocker.gvk,,troj_gen.r47c7cf,troj_gen.r47c7cf,,hoax.blocker.gvk,trojan.win32.generic!bt,clean,trojan.blocker!snqpjoap968
MTMzY2VkZDdiYzQ2NDYyZDg2Y2FkMzZkZmFhNzk2YjY,backdoor.win32.udr!ik,,win-trojan/udr.47104,bds/udr.a,,backdoor/win32.win32.gen,w32/backdoorx.gmx,win32:trojan-gen {other},,backdoor.generic7.yaf,...,backdoor.trojan,backdoor/udr,,bkdr_newheur.iz,,,backdoor.win32.udr,,backdoor.win32.udr.692018,backdoor.agent.ethv
