# Data Flow

## Sensor Inputs

In [1]:
import pandas as pd
import sympy as sp
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import InterpolatedUnivariateSpline

### Populations

In [2]:
populations = pd.read_csv('./data/london-population-projection_csv.csv')

df = pd.DataFrame(populations[0:31])
df.rename(columns={'Value':'Population'},inplace=True)


In [3]:
def CAGR(start,finish,time):
    ratio = (finish/start)**(1/time)-1
    return 1+ratio
    
def extrapolate(start,time,CAGR,random_factor):
    start_arr = np.linspace(start,start,time)
    upToTime = np.linspace(0,time-1,time)
    CAGR_arr = np.linspace(CAGR,CAGR,time)
    rand_scale = rand_scaling_between(time,random_factor)
    return start_arr*CAGR_arr**upToTime*rand_scale

def rand_scaling_between(length,factor):
    rand_arr = (np.random.rand(length-2)-0.5)*factor+1
    rand_arr = np.append(rand_arr,1)
    rand_arr = np.insert(rand_arr, 0, 1., axis=0)
    return rand_arr


### CCTV Cameras

In [4]:
# Best figures are from 2002
# 1 in 14 in 2002 to 1 in 11 in 2020

cameras_2020 = 11
cameras_2002 = 14
t=18

cameras_CAGR = CAGR(cameras_2002,cameras_2020,18)

camera_ratios = extrapolate(cameras_2002,39,cameras_CAGR,0.05)

cameras = df['Population']/camera_ratios[8:]

df.insert(2, "Cameras", cameras, True)



### Wearables

In [5]:
# https://www-statista-com.iclibezp1.cc.ic.ac.uk/statistics/490231/wearable-devices-worldwide-by-region/
wearables = pd.read_csv('./data/wearables_western_europe.csv')
wearables_2015 = wearables.iloc[1,1]
wearables_2022 = wearables.iloc[-1,1]
t_wearables=wearables.size-1

CAGR_wearables = CAGR(wearables_2015,wearables_2022,t_wearables)
wearables_ex_up = extrapolate(wearables_2022,19,CAGR_wearables,0.05).tolist()
wearables_ex_down = np.flip(extrapolate(wearables_2022,4,(2-CAGR_wearables),0.05)).tolist()

wearables_list =  wearables_ex_down+wearables['Number Connected Wearables (millions)'].tolist() + wearables_ex_up

# Scale for London
wearables_np = np.array(wearables_list)/197*8990000

df.insert(3, "Wearables", wearables_np, True)



### Smart Phones

Log Curve fitted to data in Excel and noisy data extrapolated 2021 to 2041

In [6]:
# https://www-statista-com.iclibezp1.cc.ic.ac.uk/statistics/271851/smartphone-owners-in-the-united-kingdom-uk-by-age/
phones = pd.read_csv('./data/smartphone_ownership.csv')

mean_phone_ownership = phones.iloc[:,1:5].mean(axis=1)

phones = mean_phone_ownership*df['Population']/100

df.insert(4, "Phones", phones, True)



In [7]:
df

Unnamed: 0,Year,Population,Cameras,Wearables,Phones
0,2011-01-01,8217475.0,642057.3,6649692.0,3245903.0
1,2012-01-01,8321035.0,664658.3,7345544.0,3910886.0
2,2013-01-01,8428809.0,671985.6,8000020.0,5099429.0
3,2014-01-01,8550555.0,712154.5,8761827.0,6049518.0
4,2015-01-01,8685178.0,742955.6,764378.2,6665874.0
5,2016-01-01,8798957.0,751711.6,2477954.0,6731202.0
6,2017-01-01,8904004.0,757199.6,4015838.0,7279023.0
7,2018-01-01,9006352.0,800543.0,4347144.0,7475272.0
8,2019-01-01,9106157.0,790284.4,5019797.0,7694703.0
9,2020-01-01,9203331.0,832524.2,5824790.0,8329015.0


### Calculating the daily availability of data from each sensor

#### CCTV

In [8]:
# Averge person in UK seen 70 times a day = 2016
# https://en.wikipedia.org/wiki/Mass_surveillance_in_the_United_Kingdom#cite_note-Guardian2011-25

pop_2041 = df.loc[30,'Population']
cameras_2016 = df.loc[6,'Cameras']
cameras_2041 = df.loc[30,'Cameras']
# Ratio Camera UK to London 1/32:1/11
sightings_2020 = 70*32/11
sightings_2041 = sightings_2020*cameras_2016/cameras_2041

# let 1 sighting = 20s data
# video per person per day

CCTV_per_day = sightings_2041*20

# This is almost 10 minutes per day - 9m 56s

# https://ipvm.com/reports/bandwidth-guide-for-video-networks
# Current 5MP cameras will likely be 4K in 2041
# 5MP 15FPs Panoramic Office: 3.5 Mb/s
# 4K 30FPS Intersection: 7 Mb/s

video_mega_bits_per_second = 7
CCTV_data = CCTV_per_day*video_mega_bits_per_second

# Bits to Bytes
CCTV_data_bytes = CCTV_data/8




#### Webcam Video

In [9]:
# https://www.hrmagazine.co.uk/content/other/hours-of-the-workday-are-being-wasted-on-video-calls#:~:text=Use%20of%20video%20call%20technology,spent%20video%20calling%20before%20lockdown.
# the average worker spending 3h 12m per week on platforms such as Skype, Zoom and Microsoft Teams

# With a 4k webcam
daily_seconds_webcam = (3*60+12)*60/7
webcam_data_bytes = daily_seconds_webcam/8

### Phone Audio

In [10]:
# https://www.linkedin.com/business/learning/blog/career-success-tips/you-speak-at-least-7-000-words-a-day-here-s-how-to-make-them#:~:text=Research%20shows%20the%20average%20person,you%20%E2%80%93%20and%20largely%20define%20you.
# Average person says 7000 words a day

# https://debatrix.com/en/tools/speech-calculator/#:~:text=How%20many%20words%20per%20minute,will%20use%20around%20110%20words.
# 140 words per minute

# https://www.sciencedirect.com/topics/computer-science/voice-signal#:~:text=Voice%20signals%20have%20a%20rate,1.3%20Mbps%20for%20CD%20quality.
# Audio signals range in rate from 8 Kbps to about 1.3 Mbps for CD quality
# https://homestudioexpert.com/how-to-record-high-quality-audio-on-smart-phone/
# 32 to 320 kbps on modern smartphone
# In 2041 take upper limit 320 kbps for average phone microphone

daily_seconds_phone_mic = 7000/140*60
mic_mega_bits_per_second = 320/1000

audio_data_bytes = 7000/140*60*320/1000/8

print(daily_seconds_phone_mic)
# 120 Megabytes per day

data_sources = {'CCTV': CCTV_per_day,
             'Webcam': daily_seconds_webcam,
             'Phone_Mic': daily_seconds_phone_mic}

data_flow = {'video': video_mega_bits_per_second,
             'audio': mic_mega_bits_per_second}

print(data_sources)
print(data_flow)

3000.0
{'CCTV': 2407.917298849911, 'Webcam': 1645.7142857142858, 'Phone_Mic': 3000.0}
{'video': 7, 'audio': 0.32}
