In [1]:
import numpy as np
import pandas as pd

import datetime

class TenLineSampleGenerator:
    """Generate some sample ten-line data for testing the multi-line parser

    You don't need a messagebody for the last prefix.  It will alwasy be a timestamp of the message being generated.

    Attributes:
        prefixes: what are the default prefixes that ten-lines use
    """
    NOT_AVAILABLE = "N/A"
    room_name_parts = ["s", "t", "us", "fvey", "test", "experiment", "c2", "south", "north"]
    user_name_parts = ["joa", "c2", "falcon", "flame", "cannon", "bearcat", "test", "dragon", "wasp", "bearcat"]
    message_parts =   {
        1: ["Echo", "Taco", "Bravo", "Maple", "Gravity"],
        2: ["SMACK", "ESCORT", "TEST", "DANCE"],
        3: ["TEST", "Test", "PRACTICE", "PERFORMANCE"],
        4: ["APPLE", "ORANGE", "BANANNA", "PEACH", "MANGO"],
        5: ["N/A"],
        6: ["N/A"],
        7: ["N/A"],
        8: ["N/A"],
        9: ["N/A"],
    }


    def __init__(self,
            sample_room_names = None,
            num_of_rooms = 5,
            sample_user_names = None,
            num_of_users = 5,
            prefixes=["1. ", "2. ", "3. ", "4. ", "5. ",
                      "6. ", "7. ", "8. ", "9. ", "10. "],
            appendId        = False,
            incomplete_odds = 0.001,

            latency_odds    = 0.01,
            latency_millis  = 200,
        ):
        """Return a new Ten_Line_Sample_Generator object."""
        if sample_room_names is None:
            self.sample_room_names = self.generate_sample_room_names(num_of_rooms)
        else:
            self.sample_room_names = sample_room_names

        if sample_user_names is None:
            self.sample_user_names = self.generate_sample_user_names(num_of_users)
        else:
            self.sample_user_names = sample_user_names

        self.prefixes = prefixes

        self.sample_message_data = self.generate_sample_message_data()

        self.appendId = appendId
        self.incomplete_odds = incomplete_odds

        self.latency_odds = latency_odds
        self.latency_millis = latency_millis


    def generate_sample_room_names(self, num_of_rooms):
        """Generate a list of sample room names"""
        room_names = []
        while len(room_names) < num_of_rooms:
            room_name = np.random.randint(1, len(TenLineSampleGenerator.room_name_parts)/2)
            room_name = "_".join(np.random.choice(TenLineSampleGenerator.room_name_parts, room_name))
            if room_name not in room_names:
                room_names.append(room_name)
        return room_names

    def generate_sample_user_names(self, num_of_users):
        """Generate a list of sample user names"""
        user_names = []
        while len(user_names) < num_of_users:
            username = np.random.randint(1, 3)
            username = "_".join(np.random.choice(TenLineSampleGenerator.user_name_parts, username))
            if username not in user_names:
                user_names.append(username)
        return user_names

    def generate_sample_message_data(self, message_parts=None):
        """Generate some sample message data"""
        if message_parts is None:
            message_parts = TenLineSampleGenerator.message_parts

        message_data = {}
        for i in range(1, len(self.prefixes)):
            message_data[i] = []
            if len(message_parts[i]) < 1:
                message_data[i].append(TenLineSampleGenerator.NOT_AVAILABLE)
            elif len(message_parts[i]) == 1:
                message_data[i].append(message_parts[i][0])

            pass

        print(message_data)
        return message_data




tmp = TenLineSampleGenerator()
print(tmp.sample_room_names)
print(tmp.sample_user_names)
print(tmp.sample_message_data)

{1: [], 2: [], 3: [], 4: [], 5: ['N/A'], 6: ['N/A'], 7: ['N/A'], 8: ['N/A'], 9: ['N/A']}
['experiment_c2', 's_test_us', 't_s', 's_south', 'fvey_south_test']
['dragon', 'bearcat_dragon', 'c2', 'c2_joa', 'wasp_flame']
{1: [], 2: [], 3: [], 4: [], 5: ['N/A'], 6: ['N/A'], 7: ['N/A'], 8: ['N/A'], 9: ['N/A']}


In [None]:
class Ten_Line_Parser:
    """A general parser for converting the ten-line messages from irc into standard record format.

    Attributes:
        likes_spam: A boolean indicating if we like SPAM or not.
        eggs: An integer count of the eggs we have laid.
    """
    def __init__(self,
        prefixes=["1. ", "2. ", "3. ", "4. ", "5. ",
                  "6. ", "7. ", "8. ", "9. ", "10. "],
        new_columns=["msg_1", "msg_2", "msg_3", "msg_4", "msg_5",
                  "msg_6", "msg_7", "msg_8", "msg_9", "msg_10"],
        best_matches=False,
        roomname_column  = "Room Name",
        username_column  = "Username",
        message_column   = "Message",
        timestamp_column = "Timestamp",

        groupby_cnt_col="msg_cnt",
        match_group_col="match_group", time_diff_col="time_diff",
        groupby=["Room Name", "Username"]
    ):
        """Return a new Ten_Line_Parser object.

        Args:
            prefixes: what are the prefixes to look for?
        """
        self.prefixes = prefixes
        self.new_columns = new_columns
        self.best_matches = best_matches
        self.roomname_column = roomname_column
        self.username_column = username_column
        self.message_column = roomname_column
        self.timestamp_column = timestamp_column

        self.groupby_cnt_col = groupby_cnt_col
        self.match_group_col = match_group_col
        self.time_diff_col = time_diff_col
        self.groupby = groupby



    def generate_sample_data(self,
        number_to_generate,
        start_time=None,
        end_time=None,

        smple_room_names = ["Room1", "Room2", "Room3", "Room4", "Room5"],
        smple_usernames  = ["User1", "User2", "User3", "User4", "User5"],
        smple_message_data = {
            self.new_columns[0]: [
                "Echo1", "Echo2", "Echo3", "Echo4",
                "Bravo1", "Bravo2", "Bravo3", "Bravo4",
                "Tango1", "Tango2", "Tango3", "Tango4",
            ]
        },
        message_1 = [
            "Echo1", "Echo2", "Echo3", "Echo4",
            "Bravo1", "Bravo2", "Bravo3", "Bravo4",
            "Tango1", "Tango2", "Tango3", "Tango4",
        ],
        message_2 = [
            "SMACK", "ESCORT", "TEST", "DANCE",
        ],
        message_3 = [
            "TEST1", "TEST2", "TEST3", "TEST4",
            "TEST5", "TEST6", "TEST7", "TEST8",
            "TEST9", "TEST10", "TEST11", "TEST12",
        ],
        message_4 = [
            "APPLE", "ORANGE", "BANANNA", "PEACH",
        ],
        message_5 = ["N/A"],
        message_6 = ["N/A"],
        message_7 = ["N/A"],
        message_8 = ["N/A"],
        message_9 = ["N/A"],


        appendId        = False,
        incomplete_odds = 0.001,

        latency_odds    = 0.01,
        latency_millis  = 200,
    ):
        """Generate some random data for testing.

        Args:
            number_to_generate: how many records to generate?
        """
        return None