### Changelog 

- Separating **process_iplist()** output from VT calls to folders (todays_date)
- changed output of **process_json()** FROM "_parsed-combined" to todays_date_parsed-combined in EACH RESPECTIVE FOLDER 

### Flow

1. User edits and feeds ip.csv
2. **process_iplist()** reads each and calls VT api 
3. Responses are stored in **"downloaded_vtresponse"** and seperated into folders by **respective dates** (DDMMYYYY)
4. **process_json()** reads jsons in each folder and generates a compilation for that day in each folder

### Questions / Todo List

**Update** 

I have managed to get the scripts running from jupyter notebook but have the following questions

(a) Receiving (via a web interface) either an individual or list of domais / IP addresses --> currently it's fed via CSV, should i create a front-end for people to upload their files?

(b) Storing list of domains / IP address into a queue based list --> Is this the back-end of things? That is to say, this script runs in the back-end and whenever files come in from front-end it'll trigger the script?

(c) Carryout enrichment --> Where does this "processed in the previous X days" come from? From what i understand, should i create a check such that when new information comes in, it will look at previous histories when the IP/Domain was checked, and continue / stop accordingly? 

(d) storing responses in disk and extracting subset into DB --> is there a specific subset you'd like? DB-wise I would prefer to try NoSQL as i have no experience with it!

In [4]:
import base64
import hashlib
import json
import requests
import time
import csv
import datetime
import os
import pandas as pd
from dateutil import tz
import pytz
from pymongo import MongoClient


json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

API_KEY = '0d9fdb6e32d74b9d12e3d894309531838c3aabe8d66b049fd3a7976fbedf2c68'  #@param  {type: "string"}
# API_KEY = '207349263f9c5edd176cc079fa8000a5ab912df7d9e91154842c08031658675d'  #@param  {type: "string"}



client = MongoClient('localhost',27017)
# db = client['d_ip_enrich']
db = client['filtered_sg_ip_list_day1']
    


def process_iplist(filename_to_process, columnIndex, x_days_ago):
    
    print("======= process_iplist() START =======")
    
    # TODO: Make generalised and incorporate timestamp in foldername
    now = datetime.datetime.now()
#     dt_string = now.strftime("%d%m%Y")
    dt_string = now.strftime("%Y%m%d")
    d = datetime.timedelta(days = x_days_ago)
#     deducted_date = (now - d).strftime("%d%m%Y")
    deducted_date = (now - d).strftime("%Y%m%d")
    
    with open(filename_to_process + ".csv", newline='') as inputfile:

        if not os.path.exists("downloaded_vtresponse"):
                os.makedirs("downloaded_vtresponse")

        with open(filename_to_process + "_tracker_" + dt_string + ".csv", 'w', newline='') as outputfile:
            
            ip_list = csv.reader(inputfile, delimiter=',')
            output_writer = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

            counter = 0

            for row in ip_list:
                
                if counter == 0:
                    output_writer.writerow(row + ["Time Run"] + ["Processed?"])
                    outputfile.flush()
                    counter += 1
                    continue

                print("Processing: #" + str(counter) + " - " + row[columnIndex] + " Country: " + row[1])
                
                # Check if IP was proceeded x_days_ago, if yes, will skip calling
                file_skip = to_skip(row[0], "downloaded_vtresponse", x_days_ago)
                
                 # make dir to store API Responses
                if not os.path.exists("downloaded_vtresponse/" + dt_string):
                    os.makedirs("downloaded_vtresponse/" + dt_string)

                #Get an IP address Report
                if file_skip == 0:
                    r = requests.get("https://www.virustotal.com/api/v3/ip_addresses/"+row[0], headers={"x-apikey":API_KEY})
                    
                    ## Check status 200 = ok, 204 = exceeded, 400 = bad request, 403 = forbidden
                    # if status != 200, will break all processing
                    if (r.status_code != 200):
                        print("Status Code: ",r.status_code, "please take a look" )
                        output_writer.writerow(row + [datetime.datetime.now()] + ["Not Processed due to status_code: " + str(r.status_code)]) 
                        outputfile.flush()
                        break

                    r = r.json()
                    # inputting target_geo_country from excel into JSON
                    r['data']['attributes']['target_geo_country'] = row[1]
                    
                    with open("downloaded_vtresponse/" + dt_string + "/" + row[columnIndex] + ".json", "w") as outfile:

#                         outfile.write(r.text)
                        json_obj = json.dumps(r)
                        outfile.write(json_obj)

                    output_writer.writerow(row + [datetime.datetime.now()] + ["Processed"]) 

                    outputfile.flush()
                    time.sleep(16)

                
                else:
                    
                    output_writer.writerow(row + [datetime.datetime.now()] + ["Not Processed due to DUPLICATE in past "+ str(x_days_ago) + " days"] ) 
                    outputfile.flush()
                    
                
                counter += 1
                
    print("======= process_iplist() END ======= \n\n")
#                 time.sleep(16)


# check if file exist in folder_to_process during x_days_ago, returns 0 or 1
def to_skip(filename, folder_to_process, x_days_ago):
    
    now = datetime.datetime.now()
#     dt_string = now.strftime("%d%m%Y")
    dt_string = now.strftime("%Y%m%d")

    d = datetime.timedelta(days = x_days_ago)
#     deducted_date = (now - d).strftime("%d%m%Y")
    deducted_date = (now - d).strftime("%Y%m%d")
    to_skip = 0

    folders = os.listdir("downloaded_vtresponse")
    folders = os.listdir(folder_to_process)
    
#     print("deducted_date:", deducted_date)
    

    for folder in folders:

        if to_skip == 1:
            break

        # target folders within X days range
        
#         print("current folder:", folder)
#         print("folder >= deducted_date", folder>=deducted_date)
        if folder >= deducted_date:
            files_array = os.listdir("downloaded_vtresponse/" + folder)
#                         print(files_array)

            for file in files_array:
                filename_filetype = file.rsplit('.',1)
    #             print(filename_filedate)

                ## if filename == target THEN SKIP + WRITE A NOTE
                if filename == filename_filetype[0]:
                    print(f"file has been processed on {folder} which is <{x_days_ago} days ago, will skip API call")
                    to_skip = 1
                    break
        
    
    return to_skip


    #     print(os.listdir("downloaded_vtresponse/"+ folder))    

def process_json_folder(folder_to_process,json_template):
    
    print("======= process_json_folder() START =======")
    
    # Get Date + Time to input later
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")

    # Usual Folder: downloaded_vtresponse
    combined_df = pd.DataFrame()
    
    
    for filename in os.listdir(folder_to_process):
        f = os.path.join(folder_to_process, filename)
    
#         print("f:", f)
        # check if it is a file
        if os.path.isfile(f) and f[-5:]==".json":

            print("\n **** Processing:",f, "****")
            # Opening JSON file
            f = open(f)
#             print(f)

            # returns JSON object as
            # a dictionary
            data = json.load(f)
#             print(data)

            # load JSON template
            json_template = {
                            "ip_address": "",
                            "whois_date": "",
                            "last_analysis_date": "",
                            "reputation": "",
                            "last_analysis_stats": "",
                            "total_votes": "",
                            "as_owner": "",
                            "country": "",
                            "asn": "",
                            "image":"",
                            "processed_date":"",
                            "target_geo_country":""  ## input from original excel
                            }
        
            new_row = json_template
#             new_row = json_template()

            print("json_template: ", json_template)
            print("Fresh new_row from json_template: ", new_row)

            # populate fields in JSON template
            for key in new_row:    
#                 print("current key", key)

                try:
                    current_value = data['data']['attributes'][key]
                    
                    # replace epoch with legible date format for whois_date and last_analysis_date
                    if key[-4:] == "date":
                        to_zone = tz.gettz('Singapore')
                        date_time = datetime.datetime.fromtimestamp( current_value )  
#                         current_value = date_time.replace(tzinfo=to_zone)
                        date_time.replace(tzinfo=to_zone)
                        current_value = date_time
                        
            
                    new_row[key] = current_value

                except Exception as e: 
               
                    if key == "processed_date":
                        new_row[key] = now
                        print("new_row[key]:", now)
                    
                    elif key == "ip_address":
                        new_row[key] = data['data']['id']
                    
                    else:
                        print(key,"not found with exception:",e)

            print("new_row:", new_row)
            db.ip.insert_one(new_row)

            df_result = pd.json_normalize(new_row)
            
            combined_df = pd.concat([combined_df, df_result], ignore_index=True, sort=False)
    
    
#     print(combined_df)
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")
    
    combined_df.to_csv(folder_to_process + '/' + dt_string + '_parsed-combined.csv')
    
    print("======= process_json_folder() END ======= \n\n")



# Process the list of IPs (CSVs ok but IP must be x column in the list)
# (filename.csv, column, x_days_ago)
# process_iplist("ip", 0,7)
# process_iplist("filtered_sg_ip_list_day1", 0,7)
# process_iplist("filtered_sg_ip_list_day2", 0,7)
process_iplist("filtered_sg_ip_list", 0,7)



# Process the downloaded VT JSONs
# process_json_folder("downloaded_vtresponse_10Jan_combinedFull5k")
# process_json_folder("downloaded_vtresponse/28022023",json_template_ip)


print("completed")
exit(0)





Processing: #1 - 13.227.254.94 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2 - 13.227.254.99 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #3 - 23.200.24.43 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #4 - 17.248.164.108 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #5 - 13.33.100.61 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #6 - 52.76.228.161 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #7 - 173.194.22.169 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #8 - 18.142.201.37 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #9 - 192.166.246.143 Country

Processing: #82 - 17.248.164.12 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #83 - 54.192.150.36 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #84 - 165.21.111.145 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #85 - 20.190.163.29 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #86 - 23.5.165.18 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #87 - 3.1.15.205 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #88 - 17.248.164.82 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #89 - 101.33.26.124 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #90 - 103.151.126.72 Cou

Processing: #162 - 157.240.13.174 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #163 - 13.33.33.38 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #164 - 161.117.32.237 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #165 - 13.33.92.224 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #166 - 45.40.48.215 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #167 - 13.227.248.101 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #168 - 157.240.13.14 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #169 - 23.47.190.131 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #170 - 161.11

Processing: #244 - 125.56.199.9 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #245 - 13.33.33.60 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #246 - 23.47.190.123 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #247 - 203.116.175.115 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #248 - 17.248.164.198 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #249 - 194.233.94.209 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #250 - 52.98.65.2 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #251 - 52.84.251.97 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #252 - 52.84.25

Processing: #329 - 13.33.78.115 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #330 - 23.54.56.210 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #331 - 52.74.138.239 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #332 - 54.192.150.59 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #333 - 111.223.64.57 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #334 - 51.79.223.17 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #335 - 104.28.156.111 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #336 - 47.241.41.246 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #337 - 17.253.

Processing: #399 - 17.253.61.198 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #400 - 8.212.12.137 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #401 - 184.87.202.204 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #402 - 20.43.132.130 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #403 - 18.136.234.192 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #404 - 13.33.91.132 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #405 - 52.84.251.98 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #406 - 17.248.164.15 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #407 - 184.26

Processing: #467 - 13.33.88.49 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #468 - 49.245.62.228 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #469 - 103.167.26.35 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #470 - 20.197.107.0 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #471 - 202.166.127.86 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #472 - 101.33.26.253 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #473 - 101.100.211.131 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #474 - 47.88.251.182 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #475 - 188.4

Processing: #543 - 18.141.61.240 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #544 - 20.190.163.18 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #545 - 23.210.250.147 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #546 - 51.79.147.148 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #547 - 13.33.100.113 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #548 - 118.215.87.103 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #549 - 3.0.71.181 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #550 - 3.0.114.63 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #551 - 13.227.25

Processing: #624 - 13.33.88.71 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #625 - 150.109.90.100 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #626 - 203.116.175.25 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #627 - 203.116.175.81 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #628 - 203.116.175.137 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #629 - 17.253.61.215 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #630 - 23.52.112.60 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #631 - 43.132.80.51 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #632 - 202.

Processing: #763 - 8.214.75.157 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #764 - 96.17.96.22 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #765 - 103.229.206.240 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #766 - 52.76.222.7 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #767 - 13.33.88.14 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #768 - 13.33.88.64 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #769 - 58.182.130.183 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #770 - 17.248.154.48 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #771 - 23.215.7.74

Processing: #839 - 219.75.18.254 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #840 - 119.81.220.166 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #841 - 157.240.7.54 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #842 - 104.83.196.24 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #843 - 23.205.209.29 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #844 - 23.50.95.2 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #845 - 150.109.90.126 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #846 - 116.14.234.24 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #847 - 58.182.

Processing: #908 - 23.54.56.167 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #909 - 96.17.96.26 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #910 - 23.52.112.203 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #911 - 13.213.200.29 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #912 - 172.217.194.100 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #913 - 139.99.89.178 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #914 - 13.33.33.127 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #915 - 54.179.102.83 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #916 - 89.187.

Processing: #998 - 18.136.10.134 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #999 - 17.248.164.81 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1000 - 104.65.228.244 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1001 - 129.227.82.184 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1002 - 139.99.57.5 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1003 - 103.115.78.177 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1004 - 42.60.143.143 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1005 - 23.200.24.80 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1006 

Processing: #1089 - 139.99.63.197 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1090 - 43.152.142.228 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1091 - 106.10.236.37 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1092 - 23.200.24.72 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1093 - 52.84.251.92 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1094 - 118.215.80.93 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1095 - 13.33.33.217 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1096 - 91.108.56.158 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1097 

Processing: #1179 - 116.0.81.225 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1180 - 101.33.26.230 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1181 - 203.116.175.177 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1182 - 45.40.48.214 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1183 - 155.69.19.131 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1184 - 54.192.150.110 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1185 - 47.241.171.65 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1186 - 157.240.13.32 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #11

Processing: #1283 - 74.125.12.231 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1284 - 103.11.189.158 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1285 - 23.44.2.67 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1286 - 116.0.81.223 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1287 - 156.146.56.83 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1288 - 46.137.226.162 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1289 - 101.33.26.231 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1290 - 157.240.235.34 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1291

Processing: #1385 - 13.227.254.105 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1386 - 111.223.64.81 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1387 - 54.151.248.105 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1388 - 17.248.154.4 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1389 - 104.69.36.150 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1390 - 103.107.199.163 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1391 - 54.254.242.156 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1392 - 17.248.164.109 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing:

Processing: #1534 - 23.26.222.191 Country: SG
Processing: #1535 - 54.169.52.7 Country: SG
Processing: #1536 - 54.169.250.61 Country: SG
Processing: #1537 - 8.219.3.226 Country: SG
Processing: #1538 - 52.74.13.34 Country: SG
Processing: #1539 - 17.248.164.48 Country: SG
Processing: #1540 - 218.212.69.57 Country: SG
Processing: #1541 - 51.79.157.178 Country: SG
Processing: #1542 - 178.128.219.162 Country: SG
Processing: #1543 - 52.84.251.3 Country: SG
Processing: #1544 - 101.32.104.177 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #1545 - 13.33.100.12 Country: SG
Processing: #1546 - 13.33.88.32 Country: SG
Processing: #1547 - 46.51.223.235 Country: SG
Processing: #1548 - 74.125.200.108 Country: SG
Processing: #1549 - 129.227.71.43 Country: SG
Processing: #1550 - 156.146.56.133 Country: SG
Processing: #1551 - 17.248.164.18 Country: SG
Processing: #1552 - 184.51.6.218 Country: SG
Processing: #1553 - 43.156.222.58 Country: SG
Processing

Processing: #1688 - 47.246.58.230 Country: SG
Processing: #1689 - 128.116.50.3 Country: SG
Processing: #1690 - 52.84.251.13 Country: SG
Processing: #1691 - 20.212.97.243 Country: SG
Processing: #1692 - 116.0.81.224 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1693 - 51.79.164.22 Country: SG
Processing: #1694 - 184.27.123.179 Country: SG
Processing: #1695 - 118.215.84.129 Country: SG
Processing: #1696 - 40.126.35.129 Country: SG
Processing: #1697 - 23.44.1.103 Country: SG
Processing: #1698 - 122.248.197.40 Country: SG
Processing: #1699 - 52.221.123.44 Country: SG
Processing: #1700 - 13.33.33.79 Country: SG
Processing: #1701 - 17.248.164.75 Country: SG
Processing: #1702 - 74.125.68.149 Country: SG
Processing: #1703 - 43.156.222.200 Country: SG
Processing: #1704 - 129.150.48.1 Country: SG
Processing: #1705 - 119.8.183.29 Country: SG
Processing: #1706 - 52.77.181.87 Country: SG
Processing: #1707 - 72.247.81.35 Country: SG
Processing:

Processing: #1839 - 18.139.149.124 Country: SG
Processing: #1840 - 13.33.88.93 Country: SG
Processing: #1841 - 103.229.10.171 Country: SG
Processing: #1842 - 18.136.136.241 Country: SG
Processing: #1843 - 52.84.251.127 Country: SG
Processing: #1844 - 43.132.80.47 Country: SG
Processing: #1845 - 104.69.47.124 Country: SG
Processing: #1846 - 13.227.253.21 Country: SG
Processing: #1847 - 23.200.29.179 Country: SG
Processing: #1848 - 13.33.88.56 Country: SG
Processing: #1849 - 157.240.235.18 Country: SG
Processing: #1850 - 172.217.194.106 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1851 - 47.246.58.206 Country: SG
Processing: #1852 - 159.138.104.96 Country: SG
Processing: #1853 - 52.84.251.96 Country: SG
Processing: #1854 - 142.251.85.74 Country: SG
Processing: #1855 - 142.251.88.71 Country: SG
Processing: #1856 - 23.200.29.35 Country: SG
Processing: #1857 - 46.137.210.166 Country: SG
Processing: #1858 - 17.248.164.7 Country: SG
Pro

In [9]:
json_template_general = {
    
    "DNS": "", 
    "Whois": "",
    "whois_date": "", ## CONVERT FROM EPOCH TO USER FRIENDLY DATE
    "last_analysis_date": "",
    "creation_date": "",
    "reputation": "",
    "registrar": "",
    "last_analysis_stats": "",  ## SEPERATE INTO 5 COLUMNS?
    "last_https_certificate": "",
    "categories": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "download_archived_page":"",
    "image":"",
    "processed_date":""     ## OWN FIELD TO CHECK X+7 DAYS
    
    
}

In [2]:
json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

In [15]:
json_template_domain = {
    
    "DNS": "", 
    "Whois": "",
    "whois_date": "",
    "last_analysis_date": "",
    "creation_date": "",
    "reputation": "",
    "registrar": "",
    "last_analysis_stats": "",
    "last_https_certificate": "",
    "categories": "",
    "total_votes": "",
    "download_archived_page":"",
    "image":"",
    "processed_date":""
    
    
}

In [19]:
for key in json_template_ip:
    json_template_ip[key] = 1 
    
json_template_ip

{'whois_date': 1,
 'last_analysis_date': 1,
 'reputation': 1,
 'last_analysis_stats': 1,
 'total_votes': 1,
 'as_owner': 1,
 'country': 1,
 'asn': 1,
 'image': 1}

In [36]:
class ip_template:

    # class attribute
#     ip_address = ""
#     whois_date = ""
#     last_analysis_date = ""
#     reputation= ""
#     last_analysis_stats=""
#     total_votes= ""
#     as_owner=""
#     country= ""
#     asn=""
#     image=""
#     processed_date=""
#     target_geo_country="" 
    def __init__(self, roll_no, name, batch):
        self.roll_no = roll_no
        self.name = name
        self.batch = batch
    
new_row = ip_template('','','')
# new_row['sg']
# new_row.country="sg"
new_row_str =json.dumps(new_row.__dict__)
new_row_json = json.loads(new_row_json)
a
# new_row_json
# type(new_row_json)
# # create parrot1 object
# parrot1 = Parrot()
# parrot1.name = "Blu"
# parrot1.age = 10

# # create another object parrot2
# parrot2 = Parrot()
# parrot2.name = "Woo"
# parrot2.age = 15

# # access attributes
# print(f"{parrot1.name} is {parrot1.age} years old")
# print(f"{parrot2.name} is {parrot2.age} years old")

{'roll_no': '', 'name': '', 'batch': ''}

In [None]:
json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

In [1]:
import base64
import hashlib
import json
import requests
import time
import csv
import datetime
import os
import pandas as pd
from dateutil import tz
import pytz
from pymongo import MongoClient

API_KEY = '207349263f9c5edd176cc079fa8000a5ab912df7d9e91154842c08031658675d'  #@param  {type: "string"}
r = requests.get("https://www.virustotal.com/api/v3/ip_addresses/1.1.1.1", headers={"x-apikey":API_KEY}) 



In [14]:
# r.content

r.status_code
# site_response = str(r.content)
# print(site_response)

200

In [3]:
x_days_ago = 7

["Not Processed due to DUPLICATE in past "+ str(x_days_ago) + " days"]

['Not Processed due to DUPLICATE in past 7 days']