In [1]:
!python --version

Python 3.8.10


In [2]:
!pip install plotly missingno haversine Pyomo -q

In [3]:
!conda install glpk -y 

Collecting package metadata (current_repodata.json): done
Solving environment: done


  current version: 4.10.1
  latest version: 23.3.1

Please update conda by running

    $ conda update -n base conda



# All requested packages already installed.



Restart kernel if needed.

In [4]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import plotly.express as px #graphing
import matplotlib.pyplot as plt #graphing
import seaborn as sns #graphing
import missingno as msno #describe data
import os
from haversine import haversine, Unit
import pyomo.environ as pyo

sns.set(rc = {"figure.figsize":(10, 6)})

# Pipeline

In [5]:
!pip install kfp --upgrade -q

In [6]:
!pip install mlflow==1.13.1 boto3 awscli pyarrow kaleido -q

In [7]:
import kfp
from kfp import dsl

In [8]:
!aws --endpoint-url $MLFLOW_S3_ENDPOINT_URL s3 ls

2023-03-30 07:11:57 bpk-nb-minio
2023-03-28 20:02:11 mlflow
2023-03-28 20:06:42 mlpipeline


## Steps

In [10]:
web_downloader_op = kfp.components.load_component_from_url(
            'https://raw.githubusercontent.com/kubeflow/pipelines/master/components/contrib/web/Download/component.yaml')

In [11]:
def eda_celltowers(celltowers_path: kfp.components.InputPath('CSV'),
              graph_path: kfp.components.OutputBinaryFile(bytes)):
    import pandas as pd
    import matplotlib.pyplot as plt
    import seaborn as sns
    
    df1 = pd.read_csv(celltowers_path)
    # Goodbye Alaska and Hawaii
    hi = df1[df1["state"] == "HI"].index
    ak = df1[df1["state"] == "AK"].index
    vidor = df1[df1["city"] == "Vidor"].index

    df1.drop(hi, inplace = True)
    df1.drop(ak, inplace = True)
    df1.drop(vidor, inplace = True)
    plt.style.use("dark_background")
    plt.figure(figsize = (16, 8))
    sns.scatterplot(data = df1, x = "longitude", y = "latitude", hue = "license", 
                size = "structure", sizes = (2, 20), # marker sizes
                palette = ("#00A8E0", "#cd040b"))

    plt.title("Location of Cellular Towers in the United States by License Ownership")
    plt.legend()
    plt.savefig(graph_path)
    
eda_celltowers_op = kfp.components.create_component_from_func(
        func=eda_celltowers,
        output_component_file='eda_celltowers-component.yaml', # This is optional. It saves the component spec for future use.
        base_image='python:3.9.15',
        packages_to_install=['pandas', 'seaborn', 'matplotlib'])

# eda_celltowers_op("celltowers.csv", "towers.png")

In [12]:
def eda_census(census_path: kfp.components.InputPath('CSV'),
              graph_path: kfp.components.OutputBinaryFile(bytes)):
    import matplotlib.pyplot as plt
    import plotly.graph_objects as go
    import pandas as pd

    df = pd.read_csv(census_path)
    df = df.head(30)
    
    fig = go.Figure(data=[go.Table(
        header=dict(values=list(df.columns),
                    fill_color='paleturquoise',
                    align='left'),
        cells=dict(values=[df.City, df.State, df.Type, df.Counties, df.Population, df. Latitude, df. Longitude],
                   fill_color='lavender',
                   align='left'))
    ])
    # fig.show()
    fig.write_image(graph_path)
    
eda_census_op = kfp.components.create_component_from_func(
        func=eda_census,
        output_component_file='eda_census-component.yaml', # This is optional. It saves the component spec for future use.
        base_image='python:3.9.15',
        packages_to_install=['pandas', 'matplotlib', 'plotly', 'kaleido'])

# eda_census("us2021census.csv", "census.png")

In [13]:
def build_model(census_path: kfp.components.InputPath('CSV'), 
                celltowers_path: kfp.components.InputPath('CSV'), 
                graph_path: kfp.components.OutputBinaryFile(bytes)):
    import mlflow
    import pandas as pd
    from haversine import haversine, Unit
    import pyomo.environ as pyo
    import urllib.request
    import matplotlib.pyplot as plt
    
    urllib.request.urlretrieve("https://raw.githubusercontent.com/Barteus/kubeflow-examples/main/cell-towers-US/fultoncountyGA.jpg", "map.jpg")
    
    dfc = pd.read_csv(census_path)
    df_towers = pd.read_csv(celltowers_path)
    
    LocID  = list(range(1, 16))

    dff = dfc[(dfc.Counties.str.contains('Fulton')) & (dfc.State == "GA")]
    dff.insert(0,"LocID",LocID,True)
    
    def calcDistance (c1,c2,t1,n1,t2,n2):
        lat_lon1=(t1,n1)
        lat_lon2=(t2,n2)
        return {(c1,c2):int(haversine(lat_lon1, lat_lon2, unit=Unit.MILES))}

    nparr=dff.to_numpy()
    
    dist={}
    c={}
    a={}
    # cov_lim is the coverage range of each tower in miles
    cov_lim=10
    for i in range(len(nparr)):
        c.update({nparr[i,0]:nparr[i,5]})
        for j in range(len(nparr)):
            c1=nparr[i,0]; c2=nparr[j,0];
            t1=nparr[i,6]; n1=nparr[i,7];
            t2=nparr[j,6]; n2=nparr[j,7];
            dist.update(calcDistance(c1,c2,t1,n1,t2,n2))
            if dist[c1,c2]<cov_lim:
                a.update({(c1,c2):1})
            else:
                a.update({(c1,c2):0})
                
                
    N = nparr[:,0]
    M = nparr[:,0]
    P = 3
    
    def create_coverage_model(N, M, a, c, P):
        model = pyo.ConcreteModel(name="Maximal_Coverage")
        model.x = pyo.Var(N, within=pyo.Binary)
        model.z = pyo.Var(M, within=pyo.Binary)

        def obj_rule(mdl):
            return sum(c[m]*mdl.z[m] for m in M)
        model.obj = pyo.Objective(rule=obj_rule,sense=pyo.maximize)

        def coverage_rule(mdl, m):
            return mdl.z[m]<=sum(a[m,n]*mdl.x[n] for n in N) 
        model.demand = pyo.Constraint(M, rule=coverage_rule)

        def num_towers_rule(mdl):
            return sum(mdl.x[n] for n in N) <= P
        model.num_stores = pyo.Constraint(rule=num_towers_rule)

        return model
    
    model = create_coverage_model(N, M, a, c, P)
    solver = pyo.SolverFactory("glpk")
    res = solver.solve(model, tee=True)
    model.x.pprint() # Print the optimal cell tower locations for Verizon
    model.z.pprint() # Print if customer at each location are covered or not
    print(model.obj())
    
    num=0.2
    BBox = (dff.Longitude.min()-num,   dff.Longitude.max()+num,      
             dff.Latitude.min()-num, dff.Latitude.max()+num)
    
    fcm = plt.imread('map.jpg')
    fig, ax = plt.subplots(figsize = (8,7))
    for i in range(15):
        latitude = nparr[i,6]
        longitude= nparr[i,7]
        if (pyo.value(model.x[nparr[i,0]])) == 1:
            ax.scatter(longitude,latitude,zorder=1, alpha= 0.2, color ='red', s=19300,)
            ax.scatter(longitude,latitude,zorder=1, alpha= 1, color ='blue', s=50,)
        else:
            ax.scatter(longitude,latitude,zorder=1, alpha= 1, color ='black', s=50,)
    ax.set_title('Proposed Verizon Cellular Tower Locations')
    ax.set_xlim(BBox[0],BBox[1])
    ax.set_ylim(BBox[2],BBox[3])
    ax.imshow(fcm, zorder=0, extent = BBox, aspect= 'equal')
    print(type(fig))
    plt.savefig(graph_path)
    
    
    
build_model_op = kfp.components.create_component_from_func(
        func=build_model,
        output_component_file='build-model-component.yaml', # This is optional. It saves the component spec for future use.
        base_image='bponieckiklotz/kfp-steps:glpk-dev',
        packages_to_install=['pandas', 'matplotlib', 'mlflow==1.30', 'boto3', 'haversine', 'missingno', 'Pyomo'])

# build_model(census_path="us2021census.csv", celltowers_path="celltowers.csv", graph_path="modeloutput.png")

In [14]:
from kubernetes.client.models import V1EnvVar
from kfp.onprem import use_k8s_secret

@dsl.pipeline(
    name="e2e_telco_pipeline",
    description="Telco pipeline",
)
def telco_pipeline(celltowers_url, census_url):
    celltowers_downloader_task = web_downloader_op(celltowers_url)
    eda_celltowers_task = eda_celltowers_op(celltowers_downloader_task.outputs['data'])
    census_downloader_task = web_downloader_op(census_url)
    eda_census_task = eda_census_op(census_downloader_task.outputs['data'])
    build_model_task = (build_model_op(census_downloader_task.outputs['data'], 
                                      celltowers_downloader_task.outputs['data'])
                        .add_env_variable(V1EnvVar(name='MLFLOW_TRACKING_URI', value='http://mlflow-server.kubeflow.svc.cluster.local:5000'))
                        .add_env_variable(V1EnvVar(name='MLFLOW_S3_ENDPOINT_URL', value='http://minio.kubeflow.svc.cluster.local:9000'))
                         #https://kubeflow-pipelines.readthedocs.io/en/stable/source/kfp.extensions.html#kfp.onprem.use_k8s_secret
                        .apply(use_k8s_secret(secret_name='mlpipeline-minio-artifact', k8s_secret_key_to_env={
                         'accesskey': 'AWS_ACCESS_KEY_ID',
                         'secretkey': 'AWS_SECRET_ACCESS_KEY',
                        })))
    

In [15]:
client = kfp.Client()

In [16]:
client.create_run_from_pipeline_func(
    telco_pipeline,
    arguments={
        "celltowers_url": "https://raw.githubusercontent.com/Barteus/kubeflow-examples/main/cell-towers-US/celltowers.csv",
        "census_url": "https://raw.githubusercontent.com/Barteus/kubeflow-examples/main/cell-towers-US/us2021census.csv",
    })

RunPipelineResult(run_id=8a5ba51d-ae63-44c7-8daa-9573ebc42b4f)