### Changelog 

- Separating **process_iplist()** output from VT calls to folders (todays_date)
- changed output of **process_json()** FROM "_parsed-combined" to todays_date_parsed-combined in EACH RESPECTIVE FOLDER 

### Flow

1. User edits and feeds ip.csv
2. **process_iplist()** reads each and calls VT api 
3. Responses are stored in **"downloaded_vtresponse"** and seperated into folders by **respective dates** (DDMMYYYY)
4. **process_json()** reads jsons in each folder and generates a compilation for that day in each folder

### Questions / Todo List

**Update** 

I have managed to get the scripts running from jupyter notebook but have the following questions

(a) Receiving (via a web interface) either an individual or list of domais / IP addresses --> currently it's fed via CSV, should i create a front-end for people to upload their files?

(b) Storing list of domains / IP address into a queue based list --> Is this the back-end of things? That is to say, this script runs in the back-end and whenever files come in from front-end it'll trigger the script?

(c) Carryout enrichment --> Where does this "processed in the previous X days" come from? From what i understand, should i create a check such that when new information comes in, it will look at previous histories when the IP/Domain was checked, and continue / stop accordingly? 

(d) storing responses in disk and extracting subset into DB --> is there a specific subset you'd like? DB-wise I would prefer to try NoSQL as i have no experience with it!

In [1]:
import base64
import hashlib
import json
import requests
import time
import csv
import datetime
import os
import pandas as pd
from dateutil import tz
import pytz
from pymongo import MongoClient


json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

# API_KEY = '0d9fdb6e32d74b9d12e3d894309531838c3aabe8d66b049fd3a7976fbedf2c68'  #@param  {type: "string"}
API_KEY = '207349263f9c5edd176cc079fa8000a5ab912df7d9e91154842c08031658675d'  #@param  {type: "string"}



client = MongoClient('localhost',27017)
# db = client['d_ip_enrich']
db = client['filtered_sg_ip_list_day1']
    


def process_iplist(filename_to_process, columnIndex, x_days_ago):
    
    print("======= process_iplist() START =======")
    
    # TODO: Make generalised and incorporate timestamp in foldername
    now = datetime.datetime.now()
#     dt_string = now.strftime("%d%m%Y")
    dt_string = now.strftime("%Y%m%d")
    d = datetime.timedelta(days = x_days_ago)
#     deducted_date = (now - d).strftime("%d%m%Y")
    deducted_date = (now - d).strftime("%Y%m%d")
    
    with open(filename_to_process + ".csv", newline='') as inputfile:

        if not os.path.exists("downloaded_vtresponse"):
                os.makedirs("downloaded_vtresponse")

        with open(filename_to_process + "_tracker_" + dt_string + ".csv", 'w', newline='') as outputfile:
            
            ip_list = csv.reader(inputfile, delimiter=',')
            output_writer = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

            counter = 0

            for row in ip_list:
                
                if counter == 0:
                    output_writer.writerow(row + ["Time Run"] + ["Processed?"])
                    outputfile.flush()
                    counter += 1
                    continue

                print("Processing: #" + str(counter) + " - " + row[columnIndex] + " Country: " + row[1])
                
                # Check if IP was proceeded x_days_ago, if yes, will skip calling
                file_skip = to_skip(row[0], "downloaded_vtresponse", x_days_ago)
                
                 # make dir to store API Responses
                if not os.path.exists("downloaded_vtresponse/" + dt_string):
                    os.makedirs("downloaded_vtresponse/" + dt_string)

                #Get an IP address Report
                if file_skip == 0:
                    r = requests.get("https://www.virustotal.com/api/v3/ip_addresses/"+row[0], headers={"x-apikey":API_KEY})
                    
                    ## Check status 200 = ok, 204 = exceeded, 400 = bad request, 403 = forbidden
                    # if status != 200, will break all processing
                    if (r.status_code != 200):
                        print("Status Code: ",r.status_code, "please take a look" )
                        output_writer.writerow(row + [datetime.datetime.now()] + ["Not Processed due to status_code: " + str(r.status_code)]) 
                        outputfile.flush()
                        break

                    r = r.json()
                    # inputting target_geo_country from excel into JSON
                    r['data']['attributes']['target_geo_country'] = row[1]
                    
                    with open("downloaded_vtresponse/" + dt_string + "/" + row[columnIndex] + ".json", "w") as outfile:

#                         outfile.write(r.text)
                        json_obj = json.dumps(r)
                        outfile.write(json_obj)

                    output_writer.writerow(row + [datetime.datetime.now()] + ["Processed"]) 

                    outputfile.flush()
                    time.sleep(16)

                
                else:
                    
                    output_writer.writerow(row + [datetime.datetime.now()] + ["Not Processed due to DUPLICATE in past "+ str(x_days_ago) + " days"] ) 
                    outputfile.flush()
                    
                
                counter += 1
                
    print("======= process_iplist() END ======= \n\n")
#                 time.sleep(16)


# check if file exist in folder_to_process during x_days_ago, returns 0 or 1
def to_skip(filename, folder_to_process, x_days_ago):
    
    now = datetime.datetime.now()
#     dt_string = now.strftime("%d%m%Y")
    dt_string = now.strftime("%Y%m%d")

    d = datetime.timedelta(days = x_days_ago)
#     deducted_date = (now - d).strftime("%d%m%Y")
    deducted_date = (now - d).strftime("%Y%m%d")
    to_skip = 0

    folders = os.listdir("downloaded_vtresponse")
    folders = os.listdir(folder_to_process)
    
#     print("deducted_date:", deducted_date)
    

    for folder in folders:

        if to_skip == 1:
            break

        # target folders within X days range
        
#         print("current folder:", folder)
#         print("folder >= deducted_date", folder>=deducted_date)
        if folder >= deducted_date:
            files_array = os.listdir("downloaded_vtresponse/" + folder)
#                         print(files_array)

            for file in files_array:
                filename_filetype = file.rsplit('.',1)
    #             print(filename_filedate)

                ## if filename == target THEN SKIP + WRITE A NOTE
                if filename == filename_filetype[0]:
                    print(f"file has been processed on {folder} which is <{x_days_ago} days ago, will skip API call")
                    to_skip = 1
                    break
        
    
    return to_skip


    #     print(os.listdir("downloaded_vtresponse/"+ folder))    

def process_json_folder(folder_to_process,json_template):
    
    print("======= process_json_folder() START =======")
    
    # Get Date + Time to input later
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")

    # Usual Folder: downloaded_vtresponse
    combined_df = pd.DataFrame()
    
    
    for filename in os.listdir(folder_to_process):
        f = os.path.join(folder_to_process, filename)
    
#         print("f:", f)
        # check if it is a file
        if os.path.isfile(f) and f[-5:]==".json":

            print("\n **** Processing:",f, "****")
            # Opening JSON file
            f = open(f)
#             print(f)

            # returns JSON object as
            # a dictionary
            data = json.load(f)
#             print(data)

            # load JSON template
            json_template = {
                            "ip_address": "",
                            "whois_date": "",
                            "last_analysis_date": "",
                            "reputation": "",
                            "last_analysis_stats": "",
                            "total_votes": "",
                            "as_owner": "",
                            "country": "",
                            "asn": "",
                            "image":"",
                            "processed_date":"",
                            "target_geo_country":""  ## input from original excel
                            }
        
            new_row = json_template
#             new_row = json_template()

            print("json_template: ", json_template)
            print("Fresh new_row from json_template: ", new_row)

            # populate fields in JSON template
            for key in new_row:    
#                 print("current key", key)

                try:
                    current_value = data['data']['attributes'][key]
                    
                    # replace epoch with legible date format for whois_date and last_analysis_date
                    if key[-4:] == "date":
                        to_zone = tz.gettz('Singapore')
                        date_time = datetime.datetime.fromtimestamp( current_value )  
#                         current_value = date_time.replace(tzinfo=to_zone)
                        date_time.replace(tzinfo=to_zone)
                        current_value = date_time
                        
            
                    new_row[key] = current_value

                except Exception as e: 
               
                    if key == "processed_date":
                        new_row[key] = now
                        print("new_row[key]:", now)
                    
                    elif key == "ip_address":
                        new_row[key] = data['data']['id']
                    
                    else:
                        print(key,"not found with exception:",e)

            print("new_row:", new_row)
            db.ip.insert_one(new_row)

            df_result = pd.json_normalize(new_row)
            
            combined_df = pd.concat([combined_df, df_result], ignore_index=True, sort=False)
    
    
#     print(combined_df)
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")
    
    combined_df.to_csv(folder_to_process + '/' + dt_string + '_parsed-combined.csv')
    
    print("======= process_json_folder() END ======= \n\n")



# Process the list of IPs (CSVs ok but IP must be x column in the list)
# (filename.csv, column, x_days_ago)
# process_iplist("ip", 0,7)
# process_iplist("filtered_sg_ip_list_day1", 0,7)
# process_iplist("filtered_sg_ip_list_day2", 0,7)
process_iplist("filtered_sg_ip_list", 0,7)



# Process the downloaded VT JSONs
# process_json_folder("downloaded_vtresponse_10Jan_combinedFull5k")
# process_json_folder("downloaded_vtresponse/28022023",json_template_ip)


print("completed")
exit(0)





Processing: #1 - 13.227.254.94 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2 - 13.227.254.99 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #3 - 23.200.24.43 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #4 - 17.248.164.108 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #5 - 13.33.100.61 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #6 - 52.76.228.161 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #7 - 173.194.22.169 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #8 - 18.142.201.37 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #9 - 192.166.246.143 Country

Processing: #127 - 203.116.175.128 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #128 - 23.50.92.154 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #129 - 74.125.68.154 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #130 - 18.140.39.47 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #131 - 23.36.252.78 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #132 - 150.109.90.59 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #133 - 172.224.26.138 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #134 - 13.33.33.41 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #135 - 84.17.3

Processing: #249 - 194.233.94.209 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #250 - 52.98.65.2 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #251 - 52.84.251.97 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #252 - 52.84.251.74 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #253 - 43.132.80.22 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #254 - 23.15.98.69 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #255 - 54.255.26.7 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #256 - 13.33.100.129 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #257 - 13.33.79.144 C

Processing: #359 - 202.165.107.53 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #360 - 13.251.171.38 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #361 - 157.240.15.34 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #362 - 23.58.140.25 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #363 - 23.54.58.69 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #364 - 3.1.172.253 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #365 - 52.98.71.210 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #366 - 43.132.81.184 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #367 - 13.229.19.

Processing: #427 - 23.45.116.67 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #428 - 13.33.88.62 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #429 - 157.240.15.37 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #430 - 17.248.164.113 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #431 - 17.253.61.218 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #432 - 13.228.107.3 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #433 - 23.59.80.121 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #434 - 17.253.61.219 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #435 - 18.141.24

Processing: #559 - 52.221.48.28 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #560 - 54.192.150.129 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #561 - 101.33.26.117 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #562 - 13.251.133.111 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #563 - 203.205.155.79 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #564 - 156.146.56.89 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #565 - 132.147.114.72 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #566 - 104.103.146.82 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #567 - 1

Processing: #635 - 143.244.33.174 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #636 - 54.254.189.209 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #637 - 13.33.33.78 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #638 - 23.36.49.120 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #639 - 157.240.13.128 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #640 - 13.33.33.114 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #641 - 13.33.88.79 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #642 - 111.223.64.82 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #643 - 40.90.18

Processing: #774 - 43.132.81.47 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #775 - 13.229.245.58 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #776 - 52.77.146.9 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #777 - 23.200.28.48 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #778 - 157.240.235.15 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #779 - 156.146.57.46 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #780 - 210.10.7.13 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #781 - 47.246.58.242 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #782 - 162.62.163

Processing: #861 - 52.77.91.85 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #862 - 17.253.118.201 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #863 - 125.252.230.58 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #864 - 150.109.91.37 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #865 - 20.43.150.84 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #866 - 159.138.84.22 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #867 - 203.116.175.17 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #868 - 23.205.208.43 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #869 - 23.20

Processing: #934 - 23.5.165.67 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #935 - 13.33.33.62 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #936 - 13.33.35.159 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #937 - 74.125.200.106 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #938 - 67.199.150.83 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #939 - 52.84.225.192 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #940 - 51.79.145.235 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #941 - 17.253.118.202 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #942 - 156.146.

Processing: #1011 - 13.33.28.10 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1012 - 13.33.33.75 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1013 - 23.205.208.182 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1014 - 116.0.81.228 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1015 - 101.33.26.225 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1016 - 13.229.186.118 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1017 - 103.216.223.204 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1018 - 172.217.194.106 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #10

file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1110 - 175.156.116.97 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1111 - 74.125.101.198 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1112 - 13.250.246.144 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1113 - 138.113.112.74 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1114 - 47.241.8.174 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1115 - 211.152.136.117 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1116 - 175.41.174.117 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1117 - 45.60.3.208 Country: SG
file has bee

Processing: #1206 - 128.106.25.167 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1207 - 129.226.2.70 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1208 - 40.99.33.162 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1209 - 13.250.173.68 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1210 - 129.226.192.145 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1211 - 165.225.113.192 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1212 - 43.156.223.119 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1213 - 165.21.100.88 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: 

Processing: #1287 - 156.146.56.83 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1288 - 46.137.226.162 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1289 - 101.33.26.231 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1290 - 157.240.235.34 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1291 - 74.125.171.7 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1292 - 52.220.96.228 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1293 - 52.84.251.105 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1294 - 13.33.33.16 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1295

Processing: #1376 - 52.221.66.87 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1377 - 17.253.61.203 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #1378 - 13.228.161.151 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1379 - 13.227.254.60 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1380 - 157.240.235.1 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1381 - 52.84.251.7 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1382 - 3.1.207.14 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1383 - 13.33.33.22 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1384 - 13.

Processing: #1455 - 43.156.222.216 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1456 - 172.217.194.132 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #1457 - 47.88.146.98 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1458 - 180.129.36.10 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1459 - 52.84.251.56 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1460 - 203.116.175.176 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1461 - 165.21.31.96 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1462 - 18.140.150.22 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1

file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1533 - 223.25.71.115 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1534 - 23.26.222.191 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1535 - 54.169.52.7 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1536 - 54.169.250.61 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1537 - 8.219.3.226 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1538 - 52.74.13.34 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1539 - 17.248.164.48 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1540 - 218.212.69.57 Country: SG
file has been processe

Processing: #1609 - 43.245.107.46 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1610 - 172.217.27.3 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1611 - 150.109.91.7 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1612 - 157.240.235.16 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1613 - 125.56.199.24 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1614 - 43.132.81.29 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1615 - 121.7.89.176 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #1616 - 17.248.164.51 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1617 -

Processing: #1679 - 172.217.194.105 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #1680 - 172.224.26.144 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1681 - 52.84.251.83 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1682 - 17.248.164.100 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1683 - 17.253.61.199 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1684 - 13.227.254.51 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1685 - 91.108.56.164 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1686 - 43.156.222.103 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: 

Processing: #1748 - 101.33.26.137 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1749 - 51.79.204.162 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1750 - 51.79.206.166 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1751 - 172.224.26.141 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1752 - 162.62.163.70 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #1753 - 104.65.229.80 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1754 - 13.33.88.97 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1755 - 18.142.189.37 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1756

Processing: #1822 - 116.0.81.230 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1823 - 71.18.1.242 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1824 - 101.33.26.228 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1825 - 13.228.225.157 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1826 - 111.223.64.56 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1827 - 157.240.15.1 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #1828 - 203.116.175.73 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1829 - 54.255.122.90 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1830 

Processing: #1896 - 13.33.33.57 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1897 - 54.192.150.28 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1898 - 101.33.26.223 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1899 - 52.77.165.26 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1900 - 103.231.98.209 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1901 - 157.240.13.52 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1902 - 47.246.58.232 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1903 - 20.247.252.71 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #1904 

Processing: #1964 - 150.109.90.123 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1965 - 157.240.235.37 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1966 - 52.220.63.104 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1967 - 43.132.73.34 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1968 - 54.169.132.54 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1969 - 89.187.163.84 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1970 - 139.99.123.22 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #1971 - 149.154.171.5 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #19

Processing: #2055 - 52.220.183.30 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2056 - 52.220.211.31 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2057 - 164.52.91.109 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2058 - 104.65.228.26 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2059 - 23.36.252.57 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2060 - 47.246.58.232 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #2061 - 220.255.163.216 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2062 - 52.84.251.113 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #20

Processing: #2149 - 52.221.24.84 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2150 - 203.116.175.9 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2151 - 54.254.217.107 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2152 - 17.248.164.101 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2153 - 13.228.241.6 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2154 - 124.155.222.141 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2155 - 8.219.193.159 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2156 - 89.187.162.136 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #

file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2243 - 43.132.80.53 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2244 - 52.74.13.196 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2245 - 104.69.39.62 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2246 - 23.49.60.151 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2247 - 40.100.55.2 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2248 - 74.125.68.157 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2249 - 18.139.196.132 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2250 - 52.84.251.126 Country: SG
file has been process

Processing: #2323 - 47.246.58.93 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2324 - 52.114.15.135 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2325 - 203.116.175.161 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2326 - 17.248.164.16 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2327 - 18.141.157.172 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2328 - 18.140.204.7 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2329 - 106.10.219.26 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2330 - 23.52.171.114 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #23

Processing: #2415 - 161.117.125.216 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2416 - 18.143.109.6 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2417 - 13.251.8.60 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2418 - 74.125.164.137 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2419 - 20.44.220.42 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2420 - 43.132.81.47 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2421 - 13.33.33.39 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API call
Processing: #2422 - 13.228.107.3 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2423 - 7

Processing: #2493 - 157.240.7.53 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2494 - 74.125.130.147 Country: SG
Processing: #2495 - 13.33.33.21 Country: SG
Processing: #2496 - 23.46.16.128 Country: SG
Processing: #2497 - 23.98.104.193 Country: SG
Processing: #2498 - 74.125.68.156 Country: SG
Processing: #2499 - 52.77.152.198 Country: SG
Processing: #2500 - 13.33.88.118 Country: SG
Processing: #2501 - 91.245.253.228 Country: SG
Processing: #2502 - 40.99.9.82 Country: SG
Processing: #2503 - 157.240.7.20 Country: SG
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2504 - 156.59.238.0 Country: SG
Processing: #2505 - 51.79.204.51 Country: SG
Processing: #2506 - 114.119.168.162 Country: SG
Processing: #2507 - 203.117.35.15 Country: SG
Processing: #2508 - 54.192.150.26 Country: SG
Processing: #2509 - 17.253.61.214 Country: SG
file has been processed on 20230306 which is <7 days ago, will skip API

Processing: #2645 - 13.33.88.121 Country: SG
Processing: #2646 - 157.240.7.32 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #2647 - 203.116.175.10 Country: SG
Processing: #2648 - 18.141.109.184 Country: SG
Processing: #2649 - 18.136.163.115 Country: SG
Processing: #2650 - 13.35.22.210 Country: SG
Processing: #2651 - 118.215.86.168 Country: SG
Processing: #2652 - 111.65.100.17 Country: SG
Processing: #2653 - 23.205.208.49 Country: SG
Processing: #2654 - 13.214.111.70 Country: SG
Processing: #2655 - 52.221.11.60 Country: SG
Processing: #2656 - 94.74.88.100 Country: SG
file has been processed on 20230302 which is <7 days ago, will skip API call
Processing: #2657 - 23.210.250.161 Country: SG
Processing: #2658 - 103.28.54.149 Country: SG
Processing: #2659 - 129.226.3.47 Country: SG
Processing: #2660 - 23.200.24.97 Country: SG
Processing: #2661 - 52.221.118.118 Country: SG
Processing: #2662 - 117.121.250.0 Country: SG
Processing: #2663 -

Processing: #2781 - 103.229.10.180 Country: SG
Processing: #2782 - 72.247.81.163 Country: SG
Processing: #2783 - 188.42.147.33 Country: SG
Processing: #2784 - 17.248.164.36 Country: SG
Processing: #2785 - 13.33.88.81 Country: SG
Processing: #2786 - 54.169.85.42 Country: SG
Processing: #2787 - 143.92.75.65 Country: SG
Processing: #2788 - 42.60.143.143 Country: SG
file has been processed on 20230301 which is <7 days ago, will skip API call
Processing: #2789 - 111.65.100.1 Country: SG
Processing: #2790 - 13.67.15.132 Country: SG
Processing: #2791 - 74.125.200.132 Country: SG
Processing: #2792 - 13.33.88.77 Country: SG
Processing: #2793 - 13.33.88.127 Country: SG
Processing: #2794 - 13.33.88.76 Country: SG
Processing: #2795 - 119.28.121.173 Country: SG
Processing: #2796 - 103.115.76.37 Country: SG
Processing: #2797 - 203.117.34.145 Country: SG
Processing: #2798 - 13.33.33.3 Country: SG
Processing: #2799 - 71.18.1.240 Country: SG
Processing: #2800 - 23.220.203.58 Country: SG
Processing: #28

In [9]:
json_template_general = {
    
    "DNS": "", 
    "Whois": "",
    "whois_date": "", ## CONVERT FROM EPOCH TO USER FRIENDLY DATE
    "last_analysis_date": "",
    "creation_date": "",
    "reputation": "",
    "registrar": "",
    "last_analysis_stats": "",  ## SEPERATE INTO 5 COLUMNS?
    "last_https_certificate": "",
    "categories": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "download_archived_page":"",
    "image":"",
    "processed_date":""     ## OWN FIELD TO CHECK X+7 DAYS
    
    
}

In [2]:
json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

In [15]:
json_template_domain = {
    
    "DNS": "", 
    "Whois": "",
    "whois_date": "",
    "last_analysis_date": "",
    "creation_date": "",
    "reputation": "",
    "registrar": "",
    "last_analysis_stats": "",
    "last_https_certificate": "",
    "categories": "",
    "total_votes": "",
    "download_archived_page":"",
    "image":"",
    "processed_date":""
    
    
}

In [19]:
for key in json_template_ip:
    json_template_ip[key] = 1 
    
json_template_ip

{'whois_date': 1,
 'last_analysis_date': 1,
 'reputation': 1,
 'last_analysis_stats': 1,
 'total_votes': 1,
 'as_owner': 1,
 'country': 1,
 'asn': 1,
 'image': 1}

In [36]:
class ip_template:

    # class attribute
#     ip_address = ""
#     whois_date = ""
#     last_analysis_date = ""
#     reputation= ""
#     last_analysis_stats=""
#     total_votes= ""
#     as_owner=""
#     country= ""
#     asn=""
#     image=""
#     processed_date=""
#     target_geo_country="" 
    def __init__(self, roll_no, name, batch):
        self.roll_no = roll_no
        self.name = name
        self.batch = batch
    
new_row = ip_template('','','')
# new_row['sg']
# new_row.country="sg"
new_row_str =json.dumps(new_row.__dict__)
new_row_json = json.loads(new_row_json)
a
# new_row_json
# type(new_row_json)
# # create parrot1 object
# parrot1 = Parrot()
# parrot1.name = "Blu"
# parrot1.age = 10

# # create another object parrot2
# parrot2 = Parrot()
# parrot2.name = "Woo"
# parrot2.age = 15

# # access attributes
# print(f"{parrot1.name} is {parrot1.age} years old")
# print(f"{parrot2.name} is {parrot2.age} years old")

{'roll_no': '', 'name': '', 'batch': ''}

In [None]:
json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

In [1]:
import base64
import hashlib
import json
import requests
import time
import csv
import datetime
import os
import pandas as pd
from dateutil import tz
import pytz
from pymongo import MongoClient


def process_json_folder(folder_to_process,json_template):
    
    print("======= process_json_folder() START =======")
    
    # Get Date + Time to input later
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")

    # Usual Folder: downloaded_vtresponse
    combined_df = pd.DataFrame()
    
    
    for filename in os.listdir(folder_to_process):
        f = os.path.join(folder_to_process, filename)
    
#         print("f:", f)
        # check if it is a file
        if os.path.isfile(f) and f[-5:]==".json":

            print("\n **** Processing:",f, "****")
            # Opening JSON file
            f = open(f)
#             print(f)

            # returns JSON object as
            # a dictionary
            data = json.load(f)
#             print(data)

            # load JSON template
            json_template = {
                            "ip_address": "",
                            "whois_date": "",
                            "last_analysis_date": "",
                            "reputation": "",
                            "last_analysis_stats": "",
                            "total_votes": "",
                            "as_owner": "",
                            "country": "",
                            "asn": "",
                            "processed_date":"",
                            "target_geo_country":""  ## input from original excel
                            }
        
            new_row = json_template
#             new_row = json_template()

            print("json_template: ", json_template)
            print("Fresh new_row from json_template: ", new_row)

            # populate fields in JSON template
            for key in new_row:    
#                 print("current key", key)

                try:
                    current_value = data['data']['attributes'][key]
                    
                    # replace epoch with legible date format for whois_date and last_analysis_date
                    if key[-4:] == "date":
                        to_zone = tz.gettz('Singapore')
                        date_time = datetime.datetime.fromtimestamp( current_value )  
#                         current_value = date_time.replace(tzinfo=to_zone)
                        date_time.replace(tzinfo=to_zone)
                        current_value = date_time
                        
            
                    new_row[key] = current_value

                except Exception as e: 
               
                    if key == "processed_date":
                        new_row[key] = now
                        print("new_row[key]:", now)
                    
                    elif key == "ip_address":
                        new_row[key] = data['data']['id']
                    
                    else:
                        print(key,"not found with exception:",e)

            print("new_row:", new_row)
#             db.ip.insert_one(new_row)

            df_result = pd.json_normalize(new_row)
            
            combined_df = pd.concat([combined_df, df_result], ignore_index=True, sort=False)
    
    
#     print(combined_df)
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")
    
    combined_df.to_csv(folder_to_process + '/' + dt_string + '_parsed-combined.csv')
    
    print("======= process_json_folder() END ======= \n\n")



process_json_folder("downloaded_vtresponse/combined",json_template_ip)



In [14]:
# r.content

r.status_code
# site_response = str(r.content)
# print(site_response)

200

In [3]:
x_days_ago = 7

["Not Processed due to DUPLICATE in past "+ str(x_days_ago) + " days"]

['Not Processed due to DUPLICATE in past 7 days']