Analítica Wifi<a href="#Analítica-Wifi" class="anchor-link">¶</a>
=================================================================

In \[1\]:

    import apache_beam as beam
    import json
    from apache_beam.runners.interactive.interactive_runner import InteractiveRunner
    import apache_beam.runners.interactive.interactive_beam as ib

In \[2\]:

    umbral1 = 10 #Umbral transeuntes
    umbral2 = 15 #Umbral mantenimiento
    umbral3 = 20 #Umbral visitante

In \[3\]:

    class ReadData(beam.PTransform):
        
        def __init__(self, file_pattern):
            self._file_pattern = file_pattern

        def expand(self, pcoll):
            return (pcoll.pipeline
                    | beam.io.ReadFromText(self._file_pattern)
                    | beam.Map(json.loads))

In \[4\]:

    class DistinctCount(beam.PTransform):

        def expand(self, pcoll):
            return (pcoll
                    | beam.Map(lambda x: x.get('client'))
                    | beam.transforms.util.Distinct()
                    | beam.combiners.Count.Globally())

In \[5\]:

    def getVisits(dataList):
        dataList.sort(key=lambda rssi_date: rssi_date[1])
        
        visitsList = []
        visit = []
        previousDate = dataList[0][1]
        for rssi_date in dataList:
            if rssi_date[1]-previousDate < 1800000:
                visit.append(rssi_date)
            else:
                visitsList.append(visit)
                visit = [rssi_date]
            previousDate = rssi_date[1]
        visitsList.append(visit)
        
        realVisits = []
        for visit in visitsList:
            visitSize = len(visit)
            for i in range(visitSize):
                if visit[i][0] >= umbral3 and i < visitSize-1:
                    visitTime = visit[visitSize-1][1] - visit[i][1]
                    if visitTime >= 300000:
                        realVisits.append(visitTime)
                    break

        return realVisits

In \[6\]:

    class VisitsSum(beam.PTransform):

        def expand(self, pcoll):
            return (pcoll
                    | beam.Map(lambda row: row[1])
                    | beam.CombineGlobally(sum))

In \[7\]:

    def mean(dataList):
        cont = 0
        total = 0
        for data in dataList:
            total += data
            cont += 1
        return int(total/cont)

In \[8\]:

    p = beam.Pipeline(InteractiveRunner())

In \[9\]:

    transeuntes = (p | 'lectura' >> ReadData('gs://jrodriguez-test/sample.txt')
                     | 'filtroUmbralTranseuntes' >> beam.Filter(lambda row: row['rssi'] >= umbral1))

In \[10\]:

    totalTranseuntes = transeuntes | 'conteoTranseuntes' >> DistinctCount()

In \[11\]:

    #ib.show(totalTranseuntes)
    ib.head(totalTranseuntes)

Out\[11\]:

|     | 0   |
|-----|-----|
| 0   | 366 |

In \[12\]:

    visitantesTiempoVisitas = (
        transeuntes | 'filtroUmbralVisitantes' >> beam.Filter(lambda row: row['rssi'] >= umbral2)
                    | 'seleccionVariables' >> beam.Map(lambda x: (x.get('client'),(x.get('rssi'), x.get('date'))))
                    | 'agruparMac' >> beam.transforms.core.GroupByKey()
                    | 'obtenerVisitas' >> beam.MapTuple(lambda client, dataList: (client, getVisits(dataList)))
                    | 'filtroVisitantes' >> beam.Filter(lambda row : len(row[1]) > 0))

In \[13\]:

    #ib.show(visitantesTiempoVisitas)
    ib.head(visitantesTiempoVisitas)

Out\[13\]:

|     | 0                 | 1                    |
|-----|-------------------|----------------------|
| 0   | f0:0e:b5:62:e9:07 | \[14328000, 480000\] |
| 1   | 64:e7:64:ba:02:67 | \[14339000, 494000\] |
| 2   | bc:74:7b:fc:8f:80 | \[14163000\]         |
| 3   | 84:39:de:a3:dc:8d | \[14367000\]         |
| 4   | 64:e7:64:ba:0a:60 | \[14343000\]         |

In \[14\]:

    visitantesTiempoProm = (visitantesTiempoVisitas
        | 'promedio' >> beam.MapTuple(lambda client, dataList: (client, mean(dataList))))

In \[15\]:

    #ib.show(visitantesTiempoProm)
    ib.head(visitantesTiempoProm)

Out\[15\]:

|     | 0                 | 1        |
|-----|-------------------|----------|
| 0   | f0:0e:b5:62:e9:07 | 7404000  |
| 1   | 64:e7:64:ba:02:67 | 7416500  |
| 2   | bc:74:7b:fc:8f:80 | 14163000 |
| 3   | 84:39:de:a3:dc:8d | 14367000 |
| 4   | 64:e7:64:ba:0a:60 | 14343000 |

In \[16\]:

    totalVisitantes = (visitantesTiempoVisitas
        | 'conteoVisitantes' >> beam.combiners.Count.Globally())

In \[17\]:

    #ib.show(totalVisitantes)
    ib.head(totalVisitantes)

Out\[17\]:

|     | 0   |
|-----|-----|
| 0   | 92  |

In \[18\]:

    visitasCliente = (visitantesTiempoVisitas
        | 'conteoVisitas' >> beam.MapTuple(lambda client, dataList: (client, len(dataList))))

In \[19\]:

    #ib.show(visitasCliente)
    ib.head(visitasCliente)

Out\[19\]:

|     | 0                 | 1   |
|-----|-------------------|-----|
| 0   | f0:0e:b5:62:e9:07 | 2   |
| 1   | 64:e7:64:ba:02:67 | 2   |
| 2   | bc:74:7b:fc:8f:80 | 1   |
| 3   | 84:39:de:a3:dc:8d | 1   |
| 4   | 64:e7:64:ba:0a:60 | 1   |

In \[20\]:

    totalVisitas = visitasCliente | 'sumaVisitas' >> VisitsSum()

In \[21\]:

    #ib.show(totalVisitas)
    ib.head(totalVisitas)

Out\[21\]:

|     | 0   |
|-----|-----|
| 0   | 102 |

In \[22\]:

    ib.show_graph(p)

\[9\]: lectura

\[9\]: filtroUmbralTranseuntes

transeuntes

\[10\]: conteoTranseuntes

\[12\]: filtroUmbralVisitantes

totalTranseuntes

\[12\]: seleccionVariables

\[12\]: agruparMac

\[12\]: obtenerVisitas

\[12\]: filtroVisitantes

visitantesTiempoVisitas

\[14\]: promedio

\[16\]: conteoVisitantes

\[18\]: conteoVisitas

visitantesTiempoProm

totalVisitantes

visitasCliente

\[20\]: sumaVisitas

totalVisitas

In \[23\]:

    !gsutil cp wifi*.* gs://jrodriguez-test

    Copying file://wifi.ipynb [Content-Type=application/octet-stream]...
    / [1 files][149.0 KiB/149.0 KiB]                                                
    Operation completed over 1 objects/149.0 KiB.                                    

In \[ \]: