### Changelog 

- Separating **process_iplist()** output from VT calls to folders (todays_date)
- changed output of **process_json()** FROM "_parsed-combined" to todays_date_parsed-combined in EACH RESPECTIVE FOLDER 

### Flow

1. User edits and feeds ip.csv
2. **process_iplist()** reads each and calls VT api 
3. Responses are stored in **"downloaded_vtresponse"** and seperated into folders by **respective dates** (DDMMYYYY)
4. **process_json()** reads jsons in each folder and generates a compilation for that day in each folder

### Questions / Todo List

**Update** 

I have managed to get the scripts running from jupyter notebook but have the following questions

(a) Receiving (via a web interface) either an individual or list of domais / IP addresses --> currently it's fed via CSV, should i create a front-end for people to upload their files?

(b) Storing list of domains / IP address into a queue based list --> Is this the back-end of things? That is to say, this script runs in the back-end and whenever files come in from front-end it'll trigger the script?

(c) Carryout enrichment --> Where does this "processed in the previous X days" come from? From what i understand, should i create a check such that when new information comes in, it will look at previous histories when the IP/Domain was checked, and continue / stop accordingly? 

(d) storing responses in disk and extracting subset into DB --> is there a specific subset you'd like? DB-wise I would prefer to try NoSQL as i have no experience with it!

In [4]:
import base64
import hashlib
import json
import requests
import time
import csv
import datetime
import os
import pandas as pd
from dateutil import tz
import pytz
from pymongo import MongoClient


json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

API_KEY = '0d9fdb6e32d74b9d12e3d894309531838c3aabe8d66b049fd3a7976fbedf2c68'  #@param  {type: "string"}
# API_KEY = '207349263f9c5edd176cc079fa8000a5ab912df7d9e91154842c08031658675d'  #@param  {type: "string"}



client = MongoClient('localhost',27017)
# db = client['d_ip_enrich']
db = client['filtered_sg_ip_list_day1']
    


def process_iplist(filename_to_process, columnIndex, x_days_ago):
    
    print("======= process_iplist() START =======")
    
    # TODO: Make generalised and incorporate timestamp in foldername
    now = datetime.datetime.now()
#     dt_string = now.strftime("%d%m%Y")
    dt_string = now.strftime("%Y%m%d")
    d = datetime.timedelta(days = x_days_ago)
#     deducted_date = (now - d).strftime("%d%m%Y")
    deducted_date = (now - d).strftime("%Y%m%d")
    
    with open(filename_to_process + ".csv", newline='') as inputfile:

        if not os.path.exists("downloaded_vtresponse"):
                os.makedirs("downloaded_vtresponse")

        with open(filename_to_process + "_tracker_" + dt_string + ".csv", 'w', newline='') as outputfile:
            
            ip_list = csv.reader(inputfile, delimiter=',')
            output_writer = csv.writer(outputfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)

            counter = 0

            for row in ip_list:
                
                if counter == 0:
                    output_writer.writerow(row + ["Time Run"] + ["Processed?"])
                    outputfile.flush()
                    counter += 1
                    continue

                print("Processing: #" + str(counter) + " - " + row[columnIndex] + " Country: " + row[1])
                
                # Check if IP was proceeded x_days_ago, if yes, will skip calling
                file_skip = to_skip(row[0], "downloaded_vtresponse", x_days_ago)
                
                 # make dir to store API Responses
                if not os.path.exists("downloaded_vtresponse/" + dt_string):
                    os.makedirs("downloaded_vtresponse/" + dt_string)

                #Get an IP address Report
                if file_skip == 0:
                    r = requests.get("https://www.virustotal.com/api/v3/ip_addresses/"+row[0], headers={"x-apikey":API_KEY})
                    
                    ## Check status 200 = ok, 204 = exceeded, 400 = bad request, 403 = forbidden
                    # if status != 200, will break all processing
                    if (r.status_code != 200):
                        print("Status Code: ",r.status_code, "please take a look" )
                        output_writer.writerow(row + [datetime.datetime.now()] + ["Not Processed due to status_code: " + str(r.status_code)]) 
                        outputfile.flush()
                        break

                    r = r.json()
                    # inputting target_geo_country from excel into JSON
                    r['data']['attributes']['target_geo_country'] = row[1]
                    
                    with open("downloaded_vtresponse/" + dt_string + "/" + row[columnIndex] + ".json", "w") as outfile:

#                         outfile.write(r.text)
                        json_obj = json.dumps(r)
                        outfile.write(json_obj)

                    output_writer.writerow(row + [datetime.datetime.now()] + ["Processed"]) 

                    outputfile.flush()
                    time.sleep(16)

                
                else:
                    
                    output_writer.writerow(row + [datetime.datetime.now()] + ["Not Processed due to DUPLICATE in past "+ str(x_days_ago) + " days"] ) 
                    outputfile.flush()
                    
                
                counter += 1
                
    print("======= process_iplist() END ======= \n\n")
#                 time.sleep(16)


# check if file exist in folder_to_process during x_days_ago, returns 0 or 1
def to_skip(filename, folder_to_process, x_days_ago):
    
    now = datetime.datetime.now()
#     dt_string = now.strftime("%d%m%Y")
    dt_string = now.strftime("%Y%m%d")

    d = datetime.timedelta(days = x_days_ago)
#     deducted_date = (now - d).strftime("%d%m%Y")
    deducted_date = (now - d).strftime("%Y%m%d")
    to_skip = 0

    folders = os.listdir("downloaded_vtresponse")
    folders = os.listdir(folder_to_process)
    
    print("deducted_date:", deducted_date)
    

    for folder in folders:

        if to_skip == 1:
            break

        # target folders within X days range
        
        print("current folder:", folder)
        print("folder >= deducted_date", folder>=deducted_date)
        if folder >= deducted_date:
            files_array = os.listdir("downloaded_vtresponse/" + folder)
#                         print(files_array)

            for file in files_array:
                filename_filetype = file.rsplit('.',1)
    #             print(filename_filedate)

                ## if filename == target THEN SKIP + WRITE A NOTE
                if filename == filename_filetype[0]:
                    print(f"file has been processed on {folder} which is <{x_days_ago} days ago, will skip API call")
                    to_skip = 1
                    break
        
    
    return to_skip


    #     print(os.listdir("downloaded_vtresponse/"+ folder))    

def process_json_folder(folder_to_process,json_template):
    
    print("======= process_json_folder() START =======")
    
    # Get Date + Time to input later
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")

    # Usual Folder: downloaded_vtresponse
    combined_df = pd.DataFrame()
    
    
    for filename in os.listdir(folder_to_process):
        f = os.path.join(folder_to_process, filename)
    
#         print("f:", f)
        # check if it is a file
        if os.path.isfile(f) and f[-5:]==".json":

            print("\n **** Processing:",f, "****")
            # Opening JSON file
            f = open(f)
#             print(f)

            # returns JSON object as
            # a dictionary
            data = json.load(f)
#             print(data)

            # load JSON template
            json_template = {
                            "ip_address": "",
                            "whois_date": "",
                            "last_analysis_date": "",
                            "reputation": "",
                            "last_analysis_stats": "",
                            "total_votes": "",
                            "as_owner": "",
                            "country": "",
                            "asn": "",
                            "image":"",
                            "processed_date":"",
                            "target_geo_country":""  ## input from original excel
                            }
        
            new_row = json_template
#             new_row = json_template()

            print("json_template: ", json_template)
            print("Fresh new_row from json_template: ", new_row)

            # populate fields in JSON template
            for key in new_row:    
#                 print("current key", key)

                try:
                    current_value = data['data']['attributes'][key]
                    
                    # replace epoch with legible date format for whois_date and last_analysis_date
                    if key[-4:] == "date":
                        to_zone = tz.gettz('Singapore')
                        date_time = datetime.datetime.fromtimestamp( current_value )  
#                         current_value = date_time.replace(tzinfo=to_zone)
                        date_time.replace(tzinfo=to_zone)
                        current_value = date_time
                        
            
                    new_row[key] = current_value

                except Exception as e: 
               
                    if key == "processed_date":
                        new_row[key] = now
                        print("new_row[key]:", now)
                    
                    elif key == "ip_address":
                        new_row[key] = data['data']['id']
                    
                    else:
                        print(key,"not found with exception:",e)

            print("new_row:", new_row)
            db.ip.insert_one(new_row)

            df_result = pd.json_normalize(new_row)
            
            combined_df = pd.concat([combined_df, df_result], ignore_index=True, sort=False)
    
    
#     print(combined_df)
    now = datetime.datetime.now(pytz.timezone("Singapore"))
    dt_string = now.strftime("%d%m%Y")
    
    combined_df.to_csv(folder_to_process + '/' + dt_string + '_parsed-combined.csv')
    
    print("======= process_json_folder() END ======= \n\n")



# Process the list of IPs (CSVs ok but IP must be x column in the list)
# (filename.csv, column, x_days_ago)
# process_iplist("ip", 0,7)
# process_iplist("filtered_sg_ip_list_day1", 0,7)
# process_iplist("filtered_sg_ip_list_day2", 0,7)
process_iplist("filtered_sg_ip_list", 0,7)



# Process the downloaded VT JSONs
# process_json_folder("downloaded_vtresponse_10Jan_combinedFull5k")
# process_json_folder("downloaded_vtresponse/28022023",json_template_ip)


print("completed")
exit(0)





Processing: #1 - 13.227.254.94 Country: SG
deducted_date: 20230223
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #2 - 13.227.254.99 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #3 - 23.200.24.43 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #4 - 17.248.164.108 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current f

Processing: #36 - 188.42.147.32 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #37 - 3.1.123.42 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #38 - 23.59.80.33 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #39 - 51.79.222.34 Country: SG
deducted_date: 20230223
current fol

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #72 - 104.83.197.140 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #73 - 23.73.13.201 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #74 - 167.71.195.165 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API 

Processing: #106 - 13.35.18.228 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #107 - 117.121.250.156 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #108 - 40.99.10.66 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #109 - 54.192.151.123 Country: SG
deducted_date: 20230223
c

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #145 - 54.192.150.77 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #146 - 161.117.96.24 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #147 - 18.138.223.207 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip A

Processing: #183 - 17.253.61.216 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #184 - 157.240.13.48 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #185 - 139.99.45.37 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #186 - 23.52.171.137 Country: SG
deducted_date: 20230223
cu

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #215 - 54.192.150.13 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #216 - 13.33.100.132 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #217 - 91.108.56.173 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip AP

Processing: #250 - 52.98.65.2 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #251 - 52.84.251.97 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #252 - 52.84.251.74 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #253 - 43.132.80.22 Country: SG
deducted_date: 20230223
current

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #286 - 124.155.222.58 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #287 - 103.252.202.183 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #288 - 184.31.5.103 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip 

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #323 - 107.155.23.10 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #324 - 124.155.222.40 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #325 - 103.28.54.162 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip A

deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #360 - 13.251.171.38 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #361 - 157.240.15.34 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #362 - 23.58.140.25 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_dat

Processing: #397 - 52.220.83.205 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #398 - 142.251.91.102 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #399 - 17.253.61.198 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #400 - 8.212.12.137 Country: SG
deducted_date: 20230223
c

folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #433 - 23.59.80.121 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #434 - 17.253.61.219 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #435 - 18.141.245.52 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deduct

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #467 - 13.33.88.49 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #468 - 49.245.62.228 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #469 - 103.167.26.35 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API 

Processing: #496 - 142.251.91.103 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #497 - 101.32.104.104 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #498 - 13.227.254.98 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #499 - 51.79.164.252 Country: SG
deducted_date: 20230223

Processing: #546 - 51.79.147.148 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #547 - 13.33.100.113 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #548 - 118.215.87.103 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #549 - 3.0.71.181 Country: SG
deducted_date: 20230223
cur

file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #582 - 121.6.48.26 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #583 - 103.229.205.242 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip API call
Processing: #584 - 52.76.244.101 Country: SG
deducted_date: 20230223
current folder: 02032023
folder >= deducted_date False
current folder: 20230222
folder >= deducted_date False
current folder: 20230228
folder >= deducted_date True
file has been processed on 20230228 which is <7 days ago, will skip AP

KeyboardInterrupt: 

In [9]:
json_template_general = {
    
    "DNS": "", 
    "Whois": "",
    "whois_date": "", ## CONVERT FROM EPOCH TO USER FRIENDLY DATE
    "last_analysis_date": "",
    "creation_date": "",
    "reputation": "",
    "registrar": "",
    "last_analysis_stats": "",  ## SEPERATE INTO 5 COLUMNS?
    "last_https_certificate": "",
    "categories": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "download_archived_page":"",
    "image":"",
    "processed_date":""     ## OWN FIELD TO CHECK X+7 DAYS
    
    
}

In [2]:
json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

In [15]:
json_template_domain = {
    
    "DNS": "", 
    "Whois": "",
    "whois_date": "",
    "last_analysis_date": "",
    "creation_date": "",
    "reputation": "",
    "registrar": "",
    "last_analysis_stats": "",
    "last_https_certificate": "",
    "categories": "",
    "total_votes": "",
    "download_archived_page":"",
    "image":"",
    "processed_date":""
    
    
}

In [19]:
for key in json_template_ip:
    json_template_ip[key] = 1 
    
json_template_ip

{'whois_date': 1,
 'last_analysis_date': 1,
 'reputation': 1,
 'last_analysis_stats': 1,
 'total_votes': 1,
 'as_owner': 1,
 'country': 1,
 'asn': 1,
 'image': 1}

In [36]:
class ip_template:

    # class attribute
#     ip_address = ""
#     whois_date = ""
#     last_analysis_date = ""
#     reputation= ""
#     last_analysis_stats=""
#     total_votes= ""
#     as_owner=""
#     country= ""
#     asn=""
#     image=""
#     processed_date=""
#     target_geo_country="" 
    def __init__(self, roll_no, name, batch):
        self.roll_no = roll_no
        self.name = name
        self.batch = batch
    
new_row = ip_template('','','')
# new_row['sg']
# new_row.country="sg"
new_row_str =json.dumps(new_row.__dict__)
new_row_json = json.loads(new_row_json)
a
# new_row_json
# type(new_row_json)
# # create parrot1 object
# parrot1 = Parrot()
# parrot1.name = "Blu"
# parrot1.age = 10

# # create another object parrot2
# parrot2 = Parrot()
# parrot2.name = "Woo"
# parrot2.age = 15

# # access attributes
# print(f"{parrot1.name} is {parrot1.age} years old")
# print(f"{parrot2.name} is {parrot2.age} years old")

{'roll_no': '', 'name': '', 'batch': ''}

In [None]:
json_template_ip = {
    
    "ip_address": "",
    "whois_date": "",
    "last_analysis_date": "",
    "reputation": "",
    "last_analysis_stats": "",
    "total_votes": "",
    "as_owner": "",
    "country": "",
    "asn": "",
    "image":"",
    "processed_date":"",
    "target_geo_country":""  ## input from original excel
       
}

In [1]:
import base64
import hashlib
import json
import requests
import time
import csv
import datetime
import os
import pandas as pd
from dateutil import tz
import pytz
from pymongo import MongoClient

API_KEY = '207349263f9c5edd176cc079fa8000a5ab912df7d9e91154842c08031658675d'  #@param  {type: "string"}
r = requests.get("https://www.virustotal.com/api/v3/ip_addresses/1.1.1.1", headers={"x-apikey":API_KEY}) 



In [14]:
# r.content

r.status_code
# site_response = str(r.content)
# print(site_response)

200

In [3]:
x_days_ago = 7

["Not Processed due to DUPLICATE in past "+ str(x_days_ago) + " days"]

['Not Processed due to DUPLICATE in past 7 days']