In [1]:
import numpy as np
from scipy.integrate import odeint
import matplotlib.pyplot as plt

In [2]:
# Let's start off by assuming that there's a CSV file with some data
# that we unpack into some variables below

# Data from OpenFlights regarding all international routes
airlineCode, airlineID, source, sourceID, destination, destinationID, codeshare, stops, equipment = np.loadtxt("../data/routes.csv",delimiter=",",unpack=True,dtype='U',encoding='utf8')

# Data from OpenFlights regarding all international Airports amd their associated cities
ap_city,ap_country,iata,icao = np.loadtxt("../data/airports.csv",delimiter=",",usecols=[2,3,4,5],unpack=True,dtype='U',encoding='utf8')

ind_airports_iata = iata[(ap_country=='"India"')]  # List of all Indian airports

# Replace all quotation marks in the collected data
ind_airports_iata = [w.replace('"', '') for w in ind_airports_iata]
ap_city = [w.replace('"', '') for w in ap_city]
ap_country = [w.replace('"', '') for w in ap_country]
iata = [w.replace('"', '') for w in iata]
icao = [w.replace('"', '') for w in icao]


# List of all indian airports
ind_airports = ["DEL", "BOM","MAA","BLR","CCU","HYD","COK"]

# Foreign flights arriving to an Indian airport (may contain duplicates i.e. two flights or more from the same aiport)
origins_data = []

for ap in ind_airports:
    all_origins_to_i = source[(destination==ap)] # Find all flights to an Indian airport i
    
    for o in all_origins_to_i:                   # For each of these flights,
        if(o not in ind_airports_iata):          # If the airport is not in India (i.e. "foreign")
            for i in range(0,len(iata)):         # Append the origin city and country to the origins_with_dup list
                if(o==iata[i]):
                    origins_data.append(([airlineCode[i],o,ap_city[i],ap_country[i]]))
                    break;
            
origins_data=np.array(origins_data)

origin_airlineCode = origins_data[:,0]
origin_city = origins_data[:,2]
origin_country = origins_data[:,3]

#print(origin_city)

cities, city_num_dup = np.unique(origin_city, return_counts=True)

# WARNING!!! cities[i] and countries[i] don't represent the same data point!
# Use EITHER countries or cities!

countries, country_num_dup = np.unique(origin_country,return_counts=True)

I now have a list of countries feeding into India, and the number of flights per country.

### Estimating $D_{ij} (t)$

We now have a list of all the flights per airport $i$, and from which countries they come, per day. The problem right now is that we have the same data for every day.

This essentially gives us $D_{ij}$, since the number of passengers in a day from an airport will be `no. of flights from airport j x average number of people per flight (200?) x isBanned (0 or 1)`.

**Note:** $D_{ij}(t)$ is a *constant* without daily data.

Since we're assuming all airports in India, we can sum over all $i$, and get a number $D_j(t)$ which is the number of people entering India as whole from country $j$. The time dependence only takes into consideration whether the country has been banned or not.

I now have two arrays, one with a list of cities and the other with the number of flights from that city,


### What about $p_j(t)$

Let's start off by assuming some random-ish value of $p_j(t)$. It should clearly be indexed by city.

In [84]:
no_of_flts = country_num_dup

# Getting airline ban data and pj

pj_country, pj_ban = np.loadtxt("../data/confirmed_cases_data_processed.csv",delimiter=",",usecols=[0,1],unpack=True,dtype='U')

country_ban = np.zeros(len(countries),int)

# Finding ban date for all our countries. Assumes each country only exists once in pj_country
for i in range(0,len(countries)):
    found = False
    for j in range(0,len(pj_country)):
        if(countries[i] == pj_country[j]):
            country_ban[i] = pj_ban[j]
            found = True
    if (found == False):
        country_ban[i] = int(pj_ban[np.where(pj_country=="Other")]) # If the country isn't found, take ban_date of "Other"
    
    
# Calculating D_j x p_j

### Calculating Dj (summing Dij over i) ###

people_per_flight = 200
t = 20200101 # Jan 1, 2020
            
t_start = 20191220
t_end   = 20190325
        

def set_Dj(t):
    
    Dj = np.zeros(len(countries),int)
    
    for i in range(0,len(countries)):
            if(t<country_ban[i]):
                Dj[i] = int(no_of_flts[i]) * people_per_flight
            else:
                Dj[i] = 0
    return Dj

Djt = set_Dj(20200320)

# Now we will only work on one dimensional arrays whose indices represent time.

# Finding total number of columns N

with open("../data/confirmed_cases_data_processed.csv", 'rb') as f:
    line = next(f) # read 1 line
    N = len(line.split(b','))

#######


dates = np.loadtxt("../data/confirmed_cases_data_processed.csv",delimiter=",",usecols=np.arange(2,N-1),unpack=True,dtype='U')[:,0]
countrywise_infected = np.loadtxt("../data/confirmed_cases_data_processed.csv",delimiter=",",skiprows=1,usecols=np.arange(2,N-1),unpack=True,dtype='U')

print(countrywise_infected[np.array(np.where(dates=='20200122'))[0][0],:])



['2' '2' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '1' '0'
 '0' '0' '0' '548' '0' '1' '0' '0' '0' '0' '1' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0' '0'
 '0']


Later code from:
https://scipython.com/book/chapter-8-scipy/additional-examples/the-sir-epidemic-model/

In [None]:
# Total population, N.
N = 2e7
# Initial number of infected and recovered individuals, I0 and R0.
I0, R0 = Dijpj, 0
# Everyone else, S0, is susceptible to infection initially.
S0 = N - I0 - R0
# Contact rate, beta, and mean recovery rate, gamma, (in 1/days).
beta, gamma = 0.2, 1./10 
# A grid of time points (in days)
t = np.linspace(0, 160, 160)

# The SIR model differential equations.
def deriv(y, t, N, beta, gamma):
    S, I, R = y
    dSdt = -beta * S * I / N
    dIdt = beta * S * I / N - gamma * I
    dRdt = gamma * I
    return dSdt, dIdt, dRdt

# Initial conditions vector
y0 = S0, I0, R0
# Integrate the SIR equations over the time grid, t.
ret = odeint(deriv, y0, t, args=(N, beta, gamma))
S, I, R = ret.T

# Plot the data on three separate curves for S(t), I(t) and R(t)
fig = plt.figure(facecolor='w')
ax = fig.add_subplot(111, facecolor='#dddddd', axisbelow=True)
ax.plot(t, S/N, 'b', alpha=0.5, lw=2, label='Susceptible')
ax.plot(t, I/N, 'r', alpha=0.5, lw=2, label='Infected')
ax.plot(t, R/N, 'g', alpha=0.5, lw=2, label='Recovered with immunity')
ax.set_xlabel('Time /days')
ax.set_ylabel('Number (fraction of $N$)')
ax.set_ylim(0,1.2)
ax.yaxis.set_tick_params(length=0)
ax.xaxis.set_tick_params(length=0)
ax.grid(b=True, which='major', c='w', lw=2, ls='-')
legend = ax.legend()
legend.get_frame().set_alpha(0.5)
for spine in ('top', 'right', 'bottom', 'left'):
    ax.spines[spine].set_visible(False)
plt.show()