### Imports and Constants

In [1]:
from censys.search import CensysHosts
import re
import sys
import pandas as pd
from collections.abc import MutableMapping
# the pandas set_option function allows you to customize displayed results, among many other things https://pandas.pydata.org/docs/user_guide/options.html
pd.set_option('display.max_columns', 100)

In [2]:
# dict flattening magic necessary for cleaning up the censys API response

def _flatten_dict_gen(d, parent_key, sep):
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, MutableMapping):
            yield from flatten_dict(v, new_key, sep=sep).items()
        else:
            yield new_key, v


def flatten_dict(d: MutableMapping, parent_key: str = '', sep: str = '.'):
    return dict(_flatten_dict_gen(d, parent_key, sep))


#### %%bash magic
see https://ipython.readthedocs.io/en/stable/interactive/magics.html#cell-magics for more
obviously not required - you could just input a .txt file containing IPs you find interesting from another source (even a censys query). The example list is Cobalt Strike IPs

In [4]:
%%bash
curl https://pastebin.com/raw/7UgBYuWe > output.txt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  4148    0  4148    0     0   7251      0 --:--:-- --:--:-- --:--:--  7239


### Open TXT file, query Censys API for each IP address, format results into DataFrame

In [5]:
with open('output.txt', encoding='utf-8') as foo:
    f = foo.readlines()

h = CensysHosts()

censys_output = pd.DataFrame()

for ip in f:
    d = h.view(ip.strip())
    d = pd.json_normalize(d)
    d = flatten_dict(d) # here's the dict flattening as promised
    df = pd.DataFrame.from_dict(d)
    df = df.explode('services') # I hope you didn't need anything other than the 'services' response. I don't.
    df = df.reset_index()
    serv = [x for x in df['services']]
    res = pd.concat([df.drop('services', axis=1), pd.json_normalize(serv)], axis=1)
    censys_output = pd.concat([res, censys_output], axis=0)

In [6]:
censys_output
# the resulting dataframe has one row per port per IP address

Unnamed: 0,index,ip,location_updated_at,autonomous_system_updated_at,last_updated_at,location.continent,location.country,location.country_code,location.city,location.postal_code,location.timezone,location.province,location.coordinates.latitude,location.coordinates.longitude,location.registered_country,location.registered_country_code,autonomous_system.asn,autonomous_system.description,autonomous_system.bgp_prefix,autonomous_system.name,autonomous_system.country_code,operating_system.uniform_resource_identifier,operating_system.part,operating_system.product,operating_system.source,_decoded,extended_service_name,observed_at,perspective_id,port,service_name,source_ip,transport_protocol,truncated,dns.server_type,dns.answers,dns.questions,dns.edns.do,dns.edns.udp,dns.edns.version,dns.r_code,dns.resolves_correctly,banner,banner_hex,software,_encoding.banner,_encoding.banner_hex,http.request.method,http.request.uri,http.request.headers.User_Agent,...,dns.records.lovtiktok.com.record_type,dns.records.lovtiktok.com.resolved_at,dns.records.monadore.top.record_type,dns.records.monadore.top.resolved_at,dns.records.tropic.kim.record_type,dns.records.tropic.kim.resolved_at,http.response.headers.Access_Control_Expose_Headers,http.response.headers._encoding.Access_Control_Expose_Headers,http.response.headers._encoding.X_Cloud_Trace_Context,http.response.headers._encoding.Access_Control_Allow_Methods,http.response.headers._encoding.Access_Control_Allow_Headers,http.response.headers.X_Cloud_Trace_Context,http.response.headers.Access_Control_Allow_Methods,http.response.headers.Access_Control_Allow_Headers,dns.records.18.102.85.34.bc.googleusercontent.com.record_type,dns.records.18.102.85.34.bc.googleusercontent.com.resolved_at,dns.records.ec2-18-167-12-189.ap-east-1.compute.amazonaws.com.record_type,dns.records.ec2-18-167-12-189.ap-east-1.compute.amazonaws.com.resolved_at,dns.records.ec2-3-93-190-179.compute-1.amazonaws.com.record_type,dns.records.ec2-3-93-190-179.compute-1.amazonaws.com.resolved_at,dns.records.dts.x2yz.cn.record_type,dns.records.dts.x2yz.cn.resolved_at,dns.records.53.177.194.35.bc.googleusercontent.com.record_type,dns.records.53.177.194.35.bc.googleusercontent.com.resolved_at,dns.records.ec2-13-40-124-245.eu-west-2.compute.amazonaws.com.record_type,dns.records.ec2-13-40-124-245.eu-west-2.compute.amazonaws.com.resolved_at,dns.records.us.1pixel.xyz.record_type,dns.records.us.1pixel.xyz.resolved_at,dns.records.fucku.wdnmd.info.record_type,dns.records.fucku.wdnmd.info.resolved_at,dns.records.will.shadow.international.record_type,dns.records.will.shadow.international.resolved_at,dns.records.ec2-54-163-158-61.compute-1.amazonaws.com.record_type,dns.records.ec2-54-163-158-61.compute-1.amazonaws.com.resolved_at,dns.records.remote.nedsecures.co.za.record_type,dns.records.remote.nedsecures.co.za.resolved_at,dns.records.149-28-42-139.ipv4.nknlabs.io.record_type,dns.records.149-28-42-139.ipv4.nknlabs.io.resolved_at,dns.records.microstarltd.space.record_type,dns.records.microstarltd.space.resolved_at,dns.records.ec2-13-208-175-172.ap-northeast-3.compute.amazonaws.com.record_type,dns.records.ec2-13-208-175-172.ap-northeast-3.compute.amazonaws.com.resolved_at,dns.records.c2.pixus.cc.record_type,dns.records.c2.pixus.cc.resolved_at,dns.records.ec2-44-198-164-69.compute-1.amazonaws.com.record_type,dns.records.ec2-44-198-164-69.compute-1.amazonaws.com.resolved_at,dns.records.ressage.ca.record_type,dns.records.ressage.ca.resolved_at,dns.records.ec2-18-117-180-175.us-east-2.compute.amazonaws.com.record_type,dns.records.ec2-18-117-180-175.us-east-2.compute.amazonaws.com.resolved_at
0,0,47.111.144.178,2022-05-19T05:41:43.997083Z,2022-05-19T06:35:51.268269Z,2022-05-26T21:24:04.478Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,dns,DNS,2022-05-26T02:12:52.477551422Z,PERSPECTIVE_NTT,53.0,DNS,167.248.133.45,UDP,False,FORWARDING,"[{'name': 'ip.parrotdns.com.', 'response': '12...","[{'name': 'ip.parrotdns.com.', 'response': ';i...",True,512.0,0.0,SUCCESS,False,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,47.111.144.178,2022-05-19T05:41:43.997083Z,2022-05-19T06:35:51.268269Z,2022-05-26T21:24:04.478Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTP,2022-05-25T14:30:01.225755844Z,PERSPECTIVE_ORANGE,80.0,HTTP,167.94.145.59,TCP,False,,,,,,,,,HTTP/1.1 200 OK\r\nServer: nginx/1.18.0 (Ubunt...,485454502f312e3120323030204f4b0d0a536572766572...,[{'uniform_resource_identifier': 'cpe:2.3:a:ng...,DISPLAY_UTF8,DISPLAY_HEX,GET,http://47.111.144.178/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,0,47.111.144.178,2022-05-19T05:41:43.997083Z,2022-05-19T06:35:51.268269Z,2022-05-26T21:24:04.478Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTP,2022-05-26T08:49:10.234471676Z,PERSPECTIVE_TATA,81.0,HTTP,167.94.138.63,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,http://47.111.144.178:81/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,47.111.144.178,2022-05-19T05:41:43.997083Z,2022-05-19T06:35:51.268269Z,2022-05-26T21:24:04.478Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,banner_grab,NETBIOS,2022-05-26T03:28:26.338664995Z,PERSPECTIVE_HE,137.0,NETBIOS,162.142.125.220,UDP,False,,,,,,,,,�؄�������� CKAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA��...,e5d88400000000010000000020434b4141414141414141...,,DISPLAY_UTF8,DISPLAY_HEX,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,47.111.144.178,2022-05-19T05:41:43.997083Z,2022-05-19T06:35:51.268269Z,2022-05-26T21:24:04.478Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTPS,2022-05-26T15:43:13.988260902Z,PERSPECTIVE_NTT,443.0,HTTP,167.248.133.45,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://47.111.144.178/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,0,39.98.48.153,2022-05-21T01:17:58.762444Z,2022-05-21T01:16:49.548624Z,2022-05-26T13:13:11.489Z,Asia,China,CN,Beijing,,Asia/Shanghai,Beijing,39.9075,116.3972,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,39.96.0.0/14,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTP,2022-05-26T13:09:23.997841695Z,PERSPECTIVE_HE,8899.0,HTTP,162.142.125.10,TCP,False,,,,,,,,,HTTP/1.1 200 OK\r\nDate: <REDACTED>\nServer: ...,485454502f312e3120323030204f4b0d0a446174653a20...,[{'uniform_resource_identifier': 'cpe:2.3:a:ap...,DISPLAY_UTF8,DISPLAY_HEX,GET,http://39.98.48.153:8899/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,39.98.48.153,2022-05-21T01:17:58.762444Z,2022-05-21T01:16:49.548624Z,2022-05-26T13:13:11.489Z,Asia,China,CN,Beijing,,Asia/Shanghai,Beijing,39.9075,116.3972,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,39.96.0.0/14,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,banner_grab,UNKNOWN,2022-05-26T13:10:51.146615808Z,PERSPECTIVE_HE,55588.0,UNKNOWN,162.142.125.221,TCP,False,,,,,,,,,�\n,1503030002020a,[{'uniform_resource_identifier': 'cpe:2.3:o:*:...,DISPLAY_UTF8,DISPLAY_HEX,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,0,39.98.48.153,2022-05-21T01:17:58.762444Z,2022-05-21T01:16:49.548624Z,2022-05-26T13:13:11.489Z,Asia,China,CN,Beijing,,Asia/Shanghai,Beijing,39.9075,116.3972,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,39.96.0.0/14,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTP,2022-05-26T13:05:25.000315279Z,PERSPECTIVE_HE,65480.0,HTTP,162.142.125.222,TCP,False,,,,,,,,,HTTP/1.1 200 OK\r\nDate: <REDACTED>\nServer: ...,485454502f312e3120323030204f4b0d0a446174653a20...,[{'uniform_resource_identifier': 'cpe:2.3:a:ap...,DISPLAY_UTF8,DISPLAY_HEX,GET,http://39.98.48.153:65480/login,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,0,107.172.140.180,2022-05-14T20:08:50.075481Z,2022-05-14T08:55:16.769245Z,2022-05-26T19:15:06.334Z,North America,United States,US,,,America/Chicago,,37.7510,-97.8220,United States,US,36352,AS-COLOCROSSING,107.172.140.0/22,AS-COLOCROSSING,US,cpe:2.3:o:canonical:ubuntu_linux:14.04:*:*:*:*...,o,Linux,,ssh,SSH,2022-05-26T18:59:56.788657889Z,PERSPECTIVE_ORANGE,22.0,SSH,167.94.145.57,TCP,False,,,,,,,,,SSH-2.0-OpenSSH_6.6.1p1 Ubuntu-2ubuntu2.13,5353482d322e302d4f70656e5353485f362e362e317031...,"[{'product': 'openssh', 'other': {'comment': '...",DISPLAY_UTF8,DISPLAY_HEX,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


### Example Data Analysis using subsetting and the .filter and .value_counts methods

In [7]:
https_df = censys_output[(censys_output["port"] == 443)]
# selecting part of a dataframe is often referred to as "subsetting"
# for more documentation on subsetting, please consult https://pandas.pydata.org/docs/getting_started/intro_tutorials/03_subset_data.html
# here, we are filtering for only scans on port 443

https_df

Unnamed: 0,index,ip,location_updated_at,autonomous_system_updated_at,last_updated_at,location.continent,location.country,location.country_code,location.city,location.postal_code,location.timezone,location.province,location.coordinates.latitude,location.coordinates.longitude,location.registered_country,location.registered_country_code,autonomous_system.asn,autonomous_system.description,autonomous_system.bgp_prefix,autonomous_system.name,autonomous_system.country_code,operating_system.uniform_resource_identifier,operating_system.part,operating_system.product,operating_system.source,_decoded,extended_service_name,observed_at,perspective_id,port,service_name,source_ip,transport_protocol,truncated,dns.server_type,dns.answers,dns.questions,dns.edns.do,dns.edns.udp,dns.edns.version,dns.r_code,dns.resolves_correctly,banner,banner_hex,software,_encoding.banner,_encoding.banner_hex,http.request.method,http.request.uri,http.request.headers.User_Agent,...,dns.records.lovtiktok.com.record_type,dns.records.lovtiktok.com.resolved_at,dns.records.monadore.top.record_type,dns.records.monadore.top.resolved_at,dns.records.tropic.kim.record_type,dns.records.tropic.kim.resolved_at,http.response.headers.Access_Control_Expose_Headers,http.response.headers._encoding.Access_Control_Expose_Headers,http.response.headers._encoding.X_Cloud_Trace_Context,http.response.headers._encoding.Access_Control_Allow_Methods,http.response.headers._encoding.Access_Control_Allow_Headers,http.response.headers.X_Cloud_Trace_Context,http.response.headers.Access_Control_Allow_Methods,http.response.headers.Access_Control_Allow_Headers,dns.records.18.102.85.34.bc.googleusercontent.com.record_type,dns.records.18.102.85.34.bc.googleusercontent.com.resolved_at,dns.records.ec2-18-167-12-189.ap-east-1.compute.amazonaws.com.record_type,dns.records.ec2-18-167-12-189.ap-east-1.compute.amazonaws.com.resolved_at,dns.records.ec2-3-93-190-179.compute-1.amazonaws.com.record_type,dns.records.ec2-3-93-190-179.compute-1.amazonaws.com.resolved_at,dns.records.dts.x2yz.cn.record_type,dns.records.dts.x2yz.cn.resolved_at,dns.records.53.177.194.35.bc.googleusercontent.com.record_type,dns.records.53.177.194.35.bc.googleusercontent.com.resolved_at,dns.records.ec2-13-40-124-245.eu-west-2.compute.amazonaws.com.record_type,dns.records.ec2-13-40-124-245.eu-west-2.compute.amazonaws.com.resolved_at,dns.records.us.1pixel.xyz.record_type,dns.records.us.1pixel.xyz.resolved_at,dns.records.fucku.wdnmd.info.record_type,dns.records.fucku.wdnmd.info.resolved_at,dns.records.will.shadow.international.record_type,dns.records.will.shadow.international.resolved_at,dns.records.ec2-54-163-158-61.compute-1.amazonaws.com.record_type,dns.records.ec2-54-163-158-61.compute-1.amazonaws.com.resolved_at,dns.records.remote.nedsecures.co.za.record_type,dns.records.remote.nedsecures.co.za.resolved_at,dns.records.149-28-42-139.ipv4.nknlabs.io.record_type,dns.records.149-28-42-139.ipv4.nknlabs.io.resolved_at,dns.records.microstarltd.space.record_type,dns.records.microstarltd.space.resolved_at,dns.records.ec2-13-208-175-172.ap-northeast-3.compute.amazonaws.com.record_type,dns.records.ec2-13-208-175-172.ap-northeast-3.compute.amazonaws.com.resolved_at,dns.records.c2.pixus.cc.record_type,dns.records.c2.pixus.cc.resolved_at,dns.records.ec2-44-198-164-69.compute-1.amazonaws.com.record_type,dns.records.ec2-44-198-164-69.compute-1.amazonaws.com.resolved_at,dns.records.ressage.ca.record_type,dns.records.ressage.ca.resolved_at,dns.records.ec2-18-117-180-175.us-east-2.compute.amazonaws.com.record_type,dns.records.ec2-18-117-180-175.us-east-2.compute.amazonaws.com.resolved_at
4,0,47.111.144.178,2022-05-19T05:41:43.997083Z,2022-05-19T06:35:51.268269Z,2022-05-26T21:24:04.478Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTPS,2022-05-26T15:43:13.988260902Z,PERSPECTIVE_NTT,443.0,HTTP,167.248.133.45,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://47.111.144.178/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,0,121.41.116.164,2022-05-24T20:50:15.692600Z,2022-05-24T20:50:15.669053Z,2022-05-26T17:50:44.738Z,Asia,China,CN,Hangzhou,,Asia/Shanghai,Zhejiang,30.2994,120.1612,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,121.40.0.0/15,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,,,,,http,HTTPS,2022-05-26T17:50:36.717695067Z,PERSPECTIVE_TATA,443.0,HTTP,167.94.138.63,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://121.41.116.164/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,0,121.5.189.24,2022-05-16T02:28:22.809125Z,2022-05-16T02:28:22.797490Z,2022-05-26T20:03:00.814Z,Asia,China,CN,,,Asia/Shanghai,,34.7732,113.7220,China,CN,45090,TENCENT-NET-AP Shenzhen Tencent Computer Syste...,121.5.128.0/18,TENCENT-NET-AP Shenzhen Tencent Computer Syste...,CN,cpe:2.3:o:canonical:ubuntu_linux:18.04:*:*:*:*...,o,Linux,,http,HTTPS,2022-05-26T19:33:11.243358967Z,PERSPECTIVE_HE,443.0,HTTP,162.142.125.222,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://121.5.189.24/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,198.199.64.153,2022-05-16T01:07:54.984277Z,2022-05-16T01:07:54.997944Z,2022-05-26T14:58:32.394Z,North America,United States,US,North Bergen,07047,America/New_York,New Jersey,40.7930,-74.0247,United States,US,14061,DIGITALOCEAN-ASN,198.199.64.0/20,DIGITALOCEAN-ASN,US,cpe:2.3:o:canonical:ubuntu_linux:20.04:*:*:*:*...,o,Linux,,http,HTTPS,2022-05-26T14:58:32.165277635Z,PERSPECTIVE_HE,443.0,HTTP,162.142.125.222,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://198.199.64.153/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,175.24.227.223,2022-05-21T04:46:31.701288Z,2022-05-21T04:46:31.704673Z,2022-05-26T09:47:48.917Z,Asia,China,CN,,,Asia/Shanghai,,34.7732,113.7220,China,CN,45090,TENCENT-NET-AP Shenzhen Tencent Computer Syste...,175.24.192.0/18,TENCENT-NET-AP Shenzhen Tencent Computer Syste...,CN,,,,,http,HTTPS,2022-05-26T08:15:46.656082269Z,PERSPECTIVE_NTT,443.0,HTTP,167.248.133.118,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://175.24.227.223/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0,47.90.202.152,2022-05-14T04:26:53.983018Z,2022-05-14T04:26:53.966167Z,2022-05-26T17:47:24.513Z,North America,United States,US,,,America/New_York,Virginia,38.6583,-77.2481,United States,US,45102,"ALIBABA-CN-NET Alibaba US Technology Co., Ltd.",47.90.192.0/18,"ALIBABA-CN-NET Alibaba US Technology Co., Ltd.",CN,,,,,http,HTTPS,2022-05-26T13:25:24.987740004Z,PERSPECTIVE_TATA,443.0,HTTP,167.94.138.120,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://47.90.202.152/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,0,129.226.15.142,2022-05-14T22:33:20.745356Z,2022-05-15T00:43:08.702576Z,2022-05-26T17:16:58.741Z,Asia,Hong Kong,HK,Central,,Asia/Hong_Kong,Central and Western District,22.2908,114.1501,Singapore,SG,132203,"TENCENT-NET-AP-CN Tencent Building, Kejizhongy...",129.226.14.0/23,"TENCENT-NET-AP-CN Tencent Building, Kejizhongy...",CN,cpe:2.3:o:canonical:ubuntu_linux:20.04:*:*:*:*...,o,Linux,,http,HTTPS,2022-05-25T14:23:40.677821242Z,PERSPECTIVE_NTT,443.0,HTTP,167.248.133.62,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://129.226.15.142/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,78.128.113.14,2022-05-12T12:09:52.792946Z,2022-05-19T06:07:47.276233Z,2022-05-25T22:21:00.007Z,Europe,Bulgaria,BG,,,Europe/Sofia,,42.6960,23.3320,Bulgaria,BG,209160,MITI2000,78.128.113.0/24,MITI2000,BG,cpe:2.3:o:canonical:ubuntu_linux:18.04:*:*:*:*...,o,Linux,,http,HTTPS,2022-05-25T06:08:03.194490003Z,PERSPECTIVE_HE,443.0,HTTP,162.142.125.221,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://78.128.113.14/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,0,39.98.48.153,2022-05-21T01:17:58.762444Z,2022-05-21T01:16:49.548624Z,2022-05-26T13:13:11.489Z,Asia,China,CN,Beijing,,Asia/Shanghai,Beijing,39.9075,116.3972,China,CN,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,39.96.0.0/14,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN,cpe:2.3:o:*:linux:*:*:*:*:*:*:*:*,o,linux,OSI_TRANSPORT_LAYER,http,HTTPS,2022-05-26T05:19:47.542081942Z,PERSPECTIVE_HE,443.0,HTTP,162.142.125.210,TCP,False,,,,,,,,,HTTP/1.1 404 Not Found\r\nDate: <REDACTED>\nC...,485454502f312e3120343034204e6f7420466f756e640d...,,DISPLAY_UTF8,DISPLAY_HEX,GET,https://39.98.48.153/,[Mozilla/5.0 (compatible; CensysInspect/1.1; +...,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [8]:
# the .filter method allows powerful subsetting based on the names of rows or columns
# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.filter.html
# here, we use regex to filter for only columns beginning with 'http', 'ip', or 'port'
# then we filter out colums relating to the http request and encoding using the .drop method
# for more on .drop, see https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html

filtered_df = https_df.filter(regex='(^http|^ip|^port)', axis=1)
filtered_df =filtered_df[filtered_df.columns.drop(list(filtered_df.filter(regex='^http\.request')))]
filtered_df = filtered_df[filtered_df.columns.drop(list(filtered_df.filter(regex='_encoding')))]

In [9]:
filtered_df

Unnamed: 0,ip,port,http.response.protocol,http.response.status_code,http.response.status_reason,http.response.headers.Etag,http.response.headers.Connection,http.response.headers.Content_Type,http.response.headers.Server,http.response.headers.Last_Modified,http.response.headers.Date,http.response.html_tags,http.response.body_size,http.response.body,http.response.body_hash,http.response.html_title,http.supports_http2,http.response.headers.Content_Length,http.response.headers.Accept_Ranges,http.response.headers.Vary,http.response.headers.Strict_Transport_Security,http.response.headers.X_Xss_Protection,http.response.headers.X_Content_Type_Options,http.response.headers.X_Frame_Options,http.response.headers.Content_Security_Policy,http.response.headers.Upgrade,http.response.headers.Set_Cookie,http.response.headers.Keep_Alive,http.response.headers.Www_Authenticate,http.response.headers.X_Powered_By,http.response.headers.Content_Language,http.response.headers.Pragma,http.response.headers.Expires,http.response.headers.Cache_Control,http.response.headers.Access_Control_Allow_Credentials,http.response.headers.Access_Control_Allow_Origin,http.response.headers.Location,http.response.headers.X_Permitted_Cross_Domain_Policies,http.response.headers.X_Runtime,http.response.headers.X_Download_Options,http.response.headers.X_Request_Id,http.response.headers.Link,http.response.headers.Referrer_Policy,http.response.headers.X_Proxy_Error_Type,http.response.headers.Access_Control_Expose_Headers,http.response.headers.X_Cloud_Trace_Context,http.response.headers.Access_Control_Allow_Methods,http.response.headers.Access_Control_Allow_Headers
4,47.111.144.178,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
0,121.41.116.164,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,121.5.189.24,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,198.199.64.153,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,175.24.227.223,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,47.90.202.152,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,129.226.15.142,443.0,HTTP/1.1,404.0,Not Found,,,[text/html],,,[<REDACTED>],"[<title>Error 404 (Not Found)!!1</title>, <met...",1758.0,"<!doctype html>\n<html lang=""en"">\n<head>\n ...",sha1:f2e4876818b61fe01175b06a6dcc92dcbf95206b,Error 404 (Not Found)!!1,False,[1758],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,78.128.113.14,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,39.98.48.153,443.0,HTTP/1.1,404.0,Not Found,,,[text/plain],,,[<REDACTED>],,0.0,,,,False,[0],,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [10]:
# the value_counts method returns a series in descending order of frequency of unique values
filtered_df['http.response.headers.Server'].value_counts()

[nginx/1.2.5]               2
[nginx/1.14.0 (Ubuntu)]     1
[Apache/2.4.41 (Ubuntu)]    1
Name: http.response.headers.Server, dtype: int64

In [11]:
filtered_df['http.response.headers.Content_Type'].value_counts()

[text/plain]                  241
[text/html]                     7
[text/html; charset=UTF-8]      1
Name: http.response.headers.Content_Type, dtype: int64

In [12]:
filtered_df['http.response.status_code'].value_counts()

404.0    244
200.0      4
202.0      1
Name: http.response.status_code, dtype: int64

In [27]:
http_df = censys_output[(censys_output["port"] == 80)]
http_df['http.response.headers.Server'].value_counts()

[Apache/2.4.29 (Ubuntu)]                                                       48
[Apache/2.4.41 (Ubuntu)]                                                        6
[Apache]                                                                        5
[nginx/1.18.0 (Ubuntu)]                                                         3
[nginx]                                                                         2
[Microsoft-IIS/10.0]                                                            2
[nginx/1.14.1]                                                                  2
[Apache/2.4.39 (Win64) OpenSSL/1.1.1b mod_fcgid/2.3.9a mod_log_rotate/1.02]     2
[Apache/2.4.6 (CentOS)]                                                         1
[Apache/2.4.6 (CentOS) PHP/5.4.16]                                              1
[Apache/2.4.53 (Debian)]                                                        1
[Microsoft-HTTPAPI/2.0]                                                         1
[openresty/1.15.

In [28]:
http_df['http.response.headers.Content_Type'].value_counts()

[text/plain]                       81
[text/html]                        78
[text/html; charset=UTF-8]          6
[text/html; charset=iso-8859-1]     3
[text/html; charset=utf-8]          2
[text/plain; charset=utf-8]         1
[text/html; charset=us-ascii]       1
[text/html;charset=GB2312]          1
Name: http.response.headers.Content_Type, dtype: int64

In [29]:
http_df['http.response.status_code'].value_counts()

404.0    90
200.0    81
302.0     3
403.0     2
301.0     1
Name: http.response.status_code, dtype: int64

In [16]:
tls_df = censys_output[(censys_output["port"] == 443)]
tls_df = tls_df.filter(regex='(^tls\.certificates|^ip)', axis=1)
tls_df = tls_df[tls_df.columns.drop(list(tls_df.filter(regex='_encoding')))]
tls_df = tls_df[tls_df.columns.drop(list(tls_df.filter(regex='tls\.certificates\.leaf_data\.public_key\.')))]

In [17]:
tls_df

Unnamed: 0,ip,tls.certificates.leaf_fp_sha_256,tls.certificates.leaf_data.subject_dn,tls.certificates.leaf_data.issuer_dn,tls.certificates.leaf_data.pubkey_bit_size,tls.certificates.leaf_data.pubkey_algorithm,tls.certificates.leaf_data.tbs_fingerprint,tls.certificates.leaf_data.fingerprint,tls.certificates.leaf_data.issuer.common_name,tls.certificates.leaf_data.issuer.locality,tls.certificates.leaf_data.issuer.organization,tls.certificates.leaf_data.issuer.organizational_unit,tls.certificates.leaf_data.issuer.province,tls.certificates.leaf_data.issuer.country,tls.certificates.leaf_data.subject.common_name,tls.certificates.leaf_data.subject.locality,tls.certificates.leaf_data.subject.organization,tls.certificates.leaf_data.subject.organizational_unit,tls.certificates.leaf_data.subject.province,tls.certificates.leaf_data.subject.country,tls.certificates.leaf_data.signature.self_signed,tls.certificates.leaf_data.signature.signature_algorithm,tls.certificates.leaf_data.names,tls.certificates.leaf_data.issuer.email_address,tls.certificates.leaf_data.subject.email_address,tls.certificates.chain_fps_sha_256,tls.certificates.chain
4,47.111.144.178,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
0,121.41.116.164,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
3,121.5.189.24,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
1,198.199.64.153,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
1,175.24.227.223,7b3c694bd0eee47b1d7facae0f85754cd3ca457bb4c4d2...,"C=US, ST=Washington, L=Redmod, O=Microsoft Cor...","C=US, ST=Washington, L=Redmod, O=Microsoft Cor...",2048.0,RSA,3ed10421f1a4f4218cdbc3f756404be91f9a930bdc4ac4...,7b3c694bd0eee47b1d7facae0f85754cd3ca457bb4c4d2...,[Microsoft Windows],[Redmod],[Microsoft Corporation],[MOPR],[Washington],[US],[Microsoft Windows],[Redmod],[Microsoft Corporation],[MOPR],[Washington],[US],True,SHA256-RSA,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,47.90.202.152,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
4,129.226.15.142,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
1,78.128.113.14,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,
1,39.98.48.153,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,"C=, ST=, L=, O=, OU=, CN=","C=, ST=, L=, O=, OU=, CN=",2048.0,RSA,408b467860242157e0fb0a87f3d7ca31a59a02be0a7ffe...,87f2085c32b6a2cc709b365f55873e207a9caa10bffecf...,[],[],[],[],[],[],[],[],[],[],[],[],True,SHA256-RSA,,,,,


In [18]:
jarm_df = censys_output[(censys_output["port"] == 443)]
jarm_df = jarm_df.filter(regex='(^jarm|^ip)', axis=1)
jarm_df = jarm_df[jarm_df.columns.drop(list(jarm_df.filter(regex='_encoding')))]

In [19]:
jarm_df

Unnamed: 0,ip,jarm.fingerprint,jarm.cipher_and_version_fingerprint,jarm.tls_extensions_sha256,jarm.observed_at
4,47.111.144.178,07d14d16d21d21d00042d41d00041d47e4e0ae17960b2a...,07d14d16d21d21d00042d41d00041d,47e4e0ae17960b2a5b4fd6107fbb0926,2022-05-25T16:23:21.973242798Z
0,121.41.116.164,07d14d16d21d21d07c07d14d07d21d9b2f5869a6985368...,07d14d16d21d21d07c07d14d07d21d,9b2f5869a6985368a9dec764186a9175,2022-05-19T19:16:31.250470942Z
3,121.5.189.24,07d14d16d21d21d00042d41d00041d47e4e0ae17960b2a...,07d14d16d21d21d00042d41d00041d,47e4e0ae17960b2a5b4fd6107fbb0926,2022-05-09T21:32:59.665592146Z
1,198.199.64.153,07d14d16d21d21d00042d43d00041de5fb3038104f457d...,07d14d16d21d21d00042d43d00041d,e5fb3038104f457d92ba02e9311512c2,2022-05-15T16:07:15.612333600Z
1,175.24.227.223,,,,
...,...,...,...,...,...
0,47.90.202.152,07d14d16d21d21d07c42d41d00041d24a458a375eef0c5...,07d14d16d21d21d07c42d41d00041d,24a458a375eef0c576d23a7bab9a9fb1,2022-05-17T16:32:51.609732190Z
4,129.226.15.142,07d14d16d21d21d07c42d41d00041d24a458a375eef0c5...,07d14d16d21d21d07c42d41d00041d,24a458a375eef0c576d23a7bab9a9fb1,2022-05-18T17:01:00.630786494Z
1,78.128.113.14,,,,2022-05-11T19:37:51.846820330Z
1,39.98.48.153,07d14d16d21d21d07c42d41d00041d24a458a375eef0c5...,07d14d16d21d21d07c42d41d00041d,24a458a375eef0c576d23a7bab9a9fb1,2022-05-24T12:43:54.017549111Z


In [20]:
jarm_df['jarm.fingerprint'].value_counts()

07d14d16d21d21d00042d43d00041de5fb3038104f457d92ba02e9311512c2    72
07d14d16d21d21d00042d41d00041d47e4e0ae17960b2a5b4fd6107fbb0926    52
2ad2ad16d2ad2ad00042d42d00042ddb04deffa1705e2edc44cae1ed24a4da    27
07d14d16d21d21d07c07d14d07d21d9b2f5869a6985368a9dec764186a9175    15
07d14d16d21d21d07c42d41d00041d24a458a375eef0c576d23a7bab9a9fb1    13
07d14d16d21d21d00042d41d00041de5fb3038104f457d92ba02e9311512c2     8
05d13d20d21d20d05c05d13d05d20dd7fc4c7c6ef19b77a4ca0787979cdc13     3
07d13d15d21d21d07c07d13d07d21dd7fc4c7c6ef19b77a4ca0787979cdc13     2
2ad2ad16d2ad2ad22c42d42d00042de4f6cde49b80ad1e14c340f9e47ccd3a     2
2ad2ad16d2ad2ad22c42d42d00042d58c7162162b6a603d3d90a2b76865b53     2
15d3fd16d29d29d00042d43d0000009ec686233a4398bea334ba5e62e34a01     1
05d02d20d21d20d05c05d02d05d20dd7fc4c7c6ef19b77a4ca0787979cdc13     1
1dd28d28d00028d00042d41d00041d02cacad88cde684d16d82eeb4da1108c     1
1dd40d40d00040d00042d43d00041d02cacad88cde684d16d82eeb4da1108c     1
Name: jarm.fingerprint, dtype: int

In [21]:
ssh_df = censys_output[(censys_output["port"] == 22)]
ssh_df = ssh_df.filter(regex='(^ssh|^ip)', axis=1)
ssh_df = ssh_df[ssh_df.columns.drop(list(ssh_df.filter(regex='(_encoding|ecdsa_)')))]

In [22]:
ssh_df

Unnamed: 0,ip,ssh.endpoint_id.raw,ssh.endpoint_id.protocol_version,ssh.endpoint_id.software_version,ssh.endpoint_id.comment,ssh.kex_init_message.kex_algorithms,ssh.kex_init_message.host_key_algorithms,ssh.kex_init_message.client_to_server_ciphers,ssh.kex_init_message.server_to_client_ciphers,ssh.kex_init_message.client_to_server_macs,ssh.kex_init_message.server_to_client_macs,ssh.kex_init_message.client_to_server_compression,ssh.kex_init_message.server_to_client_compression,ssh.kex_init_message.first_kex_follows,ssh.algorithm_selection.kex_algorithm,ssh.algorithm_selection.host_key_algorithm,ssh.algorithm_selection.client_to_server_alg_group.cipher,ssh.algorithm_selection.client_to_server_alg_group.mac,ssh.algorithm_selection.client_to_server_alg_group.compression,ssh.algorithm_selection.server_to_client_alg_group.cipher,ssh.algorithm_selection.server_to_client_alg_group.mac,ssh.algorithm_selection.server_to_client_alg_group.compression,ssh.server_host_key.fingerprint_sha256,ssh.server_host_key.rsa_public_key.modulus,ssh.server_host_key.rsa_public_key.exponent,ssh.server_host_key.rsa_public_key.length
0,165.154.229.59,SSH-2.0-OpenSSH_7.9p1 Debian-10,2.0,OpenSSH_7.9p1,Debian-10,"[curve25519-sha256, curve25519-sha256@libssh.o...","[rsa-sha2-512, rsa-sha2-256, ssh-rsa, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,d1bcd6e678bada22ee4b4563edcbf66fe8352b44e705cc...,,,
0,114.220.176.200,SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.3,2.0,OpenSSH_8.2p1,Ubuntu-4ubuntu0.3,"[curve25519-sha256, curve25519-sha256@libssh.o...","[rsa-sha2-512, rsa-sha2-256, ssh-rsa, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,f111cb0f91744085cd4a34b991c75ed03aca14ed0e4a40...,,,
0,121.5.189.24,SSH-2.0-OpenSSH_7.6p1 Ubuntu-4ubuntu0.3,2.0,OpenSSH_7.6p1,Ubuntu-4ubuntu0.3,"[curve25519-sha256, curve25519-sha256@libssh.o...","[ssh-rsa, rsa-sha2-512, rsa-sha2-256, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,db1970b7dba2bd235871601616c01d7edef0ecb7ce49ea...,,,
0,204.44.109.84,SSH-2.0-OpenSSH_7.4,2.0,OpenSSH_7.4,,"[curve25519-sha256, curve25519-sha256@libssh.o...","[ssh-rsa, rsa-sha2-512, rsa-sha2-256, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,f98be49fb11bb7593fd45c8b8097700049384b898c7534...,,,
0,198.199.64.153,SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.4,2.0,OpenSSH_8.2p1,Ubuntu-4ubuntu0.4,"[curve25519-sha256, curve25519-sha256@libssh.o...","[rsa-sha2-512, rsa-sha2-256, ssh-rsa, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,b9a2e8f0f137b5e780ac4208eca09b9b4af2c181c671c6...,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,101.36.127.65,SSH-2.0-OpenSSH_7.6p1 Ubuntu-4ubuntu0.4,2.0,OpenSSH_7.6p1,Ubuntu-4ubuntu0.4,"[curve25519-sha256, curve25519-sha256@libssh.o...","[ssh-rsa, rsa-sha2-512, rsa-sha2-256, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,f20898f6e62d514a42616d8ab92ae13bca50a5b9d93739...,,,
0,42.192.80.90,SSH-2.0-OpenSSH_7.4,2.0,OpenSSH_7.4,,"[curve25519-sha256, curve25519-sha256@libssh.o...","[ssh-rsa, rsa-sha2-512, rsa-sha2-256, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,1ad199df33294d6f4683592079c93b8770a3f8d64db227...,,,
0,129.226.15.142,SSH-2.0-OpenSSH_8.2p1 Ubuntu-4ubuntu0.3,2.0,OpenSSH_8.2p1,Ubuntu-4ubuntu0.3,"[curve25519-sha256, curve25519-sha256@libssh.o...","[rsa-sha2-512, rsa-sha2-256, ssh-rsa, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,fcaa2e394b90f3b2d373ec2462006fd71933c5c8cc7d98...,,,
0,78.128.113.14,SSH-2.0-OpenSSH_7.6p1 Ubuntu-4ubuntu0.3,2.0,OpenSSH_7.6p1,Ubuntu-4ubuntu0.3,"[curve25519-sha256, curve25519-sha256@libssh.o...","[ssh-rsa, rsa-sha2-512, rsa-sha2-256, ecdsa-sh...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[chacha20-poly1305@openssh.com, aes128-ctr, ae...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[umac-64-etm@openssh.com, umac-128-etm@openssh...","[none, zlib@openssh.com]","[none, zlib@openssh.com]",False,curve25519-sha256@libssh.org,ecdsa-sha2-nistp256,aes128-ctr,hmac-sha2-256,none,aes128-ctr,hmac-sha2-256,none,a9ec9ded568b1e34b167284bf4ab8a657dbe269640cf2d...,,,


In [30]:
ssh_df['ssh.server_host_key.fingerprint_sha256'].value_counts()

8d1798c27b2381fcbffb19abb5bac757052f4fa9da8b246969392c7f4a6b34fd    4
d1bcd6e678bada22ee4b4563edcbf66fe8352b44e705ccea46af5ab403caa353    1
06bc61abab67b49401145609802e493c5ad2090b97e2ed8f9e15664ea8fadd93    1
cbb7a6ff4880792d8f4b9d8ff757f6b6b2f001b034eaf3c646c167069c3195f1    1
e136dea889cdeb3fc03a2250b6982e88d1efbcb806f7b354170527e07623b4a4    1
                                                                   ..
5b18213e140a7f1f62094f67caec711b31c84b1a056964f3895575046bb9ff9d    1
93dd5186e6f1a3a53ad2e21db81afd3f05b7ef9822dedfbb30f10b465039dd2d    1
6e33da57f69daaf36d9c6390e55063b0fe18a59d69716a6ee191c48a3f7d65d6    1
e7ad0679987214436f86fe6d0cfa504b627b02528f9f75fd0bd0b4ad5d876c7f    1
5455fb565e6103fec3635e2fb205cbc8d8854e9096beb30bd20b2f300339e4f3    1
Name: ssh.server_host_key.fingerprint_sha256, Length: 122, dtype: int64

In [23]:
asn_df = censys_output.filter(regex='(^autonomous_system|^ip)', axis=1)
asn_df = asn_df.drop_duplicates(subset='ip')

In [24]:
asn_df

Unnamed: 0,ip,autonomous_system_updated_at,autonomous_system.asn,autonomous_system.description,autonomous_system.bgp_prefix,autonomous_system.name,autonomous_system.country_code
0,47.111.144.178,2022-05-19T06:35:51.268269Z,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,47.111.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN
0,165.154.229.59,2022-05-26T08:28:31.387748Z,142002,SCLOUDPTELTD-AS Scloud Pte Ltd,165.154.229.0/24,SCLOUDPTELTD-AS Scloud Pte Ltd,SG
0,114.220.176.200,2022-05-21T12:17:33.080326Z,4134,"CHINANET-BACKBONE No.31,Jin-rong Street",114.216.0.0/13,"CHINANET-BACKBONE No.31,Jin-rong Street",CN
0,121.41.116.164,2022-05-24T20:50:15.669053Z,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,121.40.0.0/15,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN
0,120.24.63.15,2022-05-16T00:02:26.853856Z,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,120.24.0.0/16,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN
...,...,...,...,...,...,...,...
0,47.90.202.152,2022-05-14T04:26:53.966167Z,45102,"ALIBABA-CN-NET Alibaba US Technology Co., Ltd.",47.90.192.0/18,"ALIBABA-CN-NET Alibaba US Technology Co., Ltd.",CN
0,129.226.15.142,2022-05-15T00:43:08.702576Z,132203,"TENCENT-NET-AP-CN Tencent Building, Kejizhongy...",129.226.14.0/23,"TENCENT-NET-AP-CN Tencent Building, Kejizhongy...",CN
0,78.128.113.14,2022-05-19T06:07:47.276233Z,209160,MITI2000,78.128.113.0/24,MITI2000,BG
0,39.98.48.153,2022-05-21T01:16:49.548624Z,37963,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,39.96.0.0/14,ALIBABA-CN-NET Hangzhou Alibaba Advertising Co...,CN


In [25]:
asn_df['autonomous_system.asn'].value_counts()

45090     118
37963      42
14061      15
132203      9
36352       7
         ... 
35251       1
4538        1
51659       1
202325      1
209160      1
Name: autonomous_system.asn, Length: 62, dtype: int64

In [26]:
asn_df['autonomous_system.bgp_prefix'].value_counts()

114.132.64.0/18     17
114.132.0.0/18       9
43.138.128.0/18      6
121.40.0.0/15        5
43.138.192.0/18      4
                    ..
46.161.27.0/24       1
159.65.128.0/20      1
45.136.244.0/23      1
198.144.176.0/21     1
107.172.140.0/22     1
Name: autonomous_system.bgp_prefix, Length: 224, dtype: int64