# Map Apache Logs

### Pull Logs and GeoIP DB

In [1]:
! curl -L http://www.almhuette-raith.at/apache-log/access.log -o access.log
! curl -L https://git.io/GeoLite2-City.mmdb -o GeoLite2-City.mmdb 
! pip3 install geoip2

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  119M  100  119M    0     0  5161k      0  0:00:23  0:00:23 --:--:-- 5458k
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100   152  100   152    0     0    232      0 --:--:-- --:--:-- --:--:--   232
100   626  100   626    0     0    755      0 --:--:-- --:--:-- --:--:--   755
100 70.3M  100 70.3M    0     0  9789k      0  0:00:07  0:00:07 --:--:-- 11.1M
[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m


### Import Libraries

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
import geoip2.database

### Read in Logs to Dataframe

In [3]:
df = pd.read_csv('access.log',
              sep=r'\s(?=(?:[^"]*"[^"]*")*[^"]*$)(?![^\[]*\])',
              engine='python',
              usecols=[0, 3, 4, 5, 6, 7, 8],
              names=['ip', 'time', 'request', 'status', 'size', 'referer', 'user_agent'],
              na_values='-',
              header=None
                )

### Build DataFrame of IP

In [4]:
ip_info = df.ip.value_counts().rename_axis('ip').reset_index(name='counts')
ip_info= ip_info[ip_info['counts'] > 50]  
ip_info['ip'] = ip_info['ip'].astype('str')
ip_info

Unnamed: 0,ip,counts
0,193.106.31.130,340874
1,197.52.128.37,40777
2,37.239.22.2,16101
3,45.133.1.60,7514
4,173.255.176.5,5220
...,...,...
779,45.242.86.62,51
780,188.43.35.5,51
781,109.252.122.160,51
782,194.5.53.112,51


### Build GeoIP Dataframe

In [5]:
def get_latitude(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = response.location.latitude
        return output
    except:
        return pd.np.nan


def get_longitude(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = response.location.longitude
        return output
    except:
        return pd.np.nan

def get_country(ip):
    try:
        reader = geoip2.database.Reader('GeoLite2-City.mmdb')
        response = reader.city(ip)
        output = str(response.country.iso_code)
        return output
    except:
        return pd.np.nan

unique_ips = ip_info['ip'].unique()
unique_ips = pd.Series(unique_ips, index = unique_ips)
ip_info['Latitude'] = ip_info['ip'].map(unique_ips.apply(get_latitude))
ip_info['Longitude'] = ip_info['ip'].map(unique_ips.apply(get_longitude))
ip_info['Country'] = ip_info['ip'].map(unique_ips.apply(get_country))

### Build Map

In [6]:
map = folium.Map(location=[50, 0], tiles='cartodb positron', zoom_start=4)

mcluster = MarkerCluster().add_to(map)

for index, row in ip_info.iterrows():
    popup = "IP: " + row.ip + "\n Count: " + str(row.counts)
    folium.Marker(location=[row['Latitude'],row['Longitude']], popup=popup).add_to(mcluster)

map