In [167]:
import csv
import io

In [168]:
def csv_to_rows_of_strings(csv_string=None, filehandle=None, path=None):
    """Read a path/csv_string/file obj and spit out rows of string lists.

    Specify ONE of csv_string, filehandle, or path (first source found in
    that order wins if multiple are provided). Make sure to follow the csv
    module instructions (open with newline='') and ensure the encoding is correct
    if you provide a filehandle. Paths to files assume a utf-8 encoded file.
    CSVs are opened with default csv module settings.

    :param csv_string: str, A string containing the contents of a CSV file.
    :param filehandle: An open file object (in string mode) to a CSV file.
    :param path: str, Path on disk to a CSV file.
    """

    # Dump whatever data source into a BytesIO object,
    # then read it with the CSV reader
    data = io.StringIO()
    if csv_string is not None:
        data.write(csv_string)
    elif filehandle is not None:
        data.write(filehandle.read())
    elif path is not None:
        with open(path, encoding='utf8', newline='') as csvfile:
            data.write(csvfile.read())
    else:
        raise Exception("Must provide a source for data!")
    # Put seek position at 0 (like an unread file)
    data.seek(0)

    rows = []
    reader = csv.reader(data)
    for row in reader:
        rows.append(row)
    return rows

In [169]:
traffic_rows = csv_to_rows_of_strings(path=r'readthedocs_traffic_analytics_jupyterlab_2023-10-20_2024-01-18.csv')
search_rows = csv_to_rows_of_strings(path=r'readthedocs_search_analytics_jupyterlab_2023-10-20_2024-01-18.csv')

In [170]:
class RowColumnView:
    """Gives a row index or column-name indexable list.

    Supports:
      - "ColumnName" in mydata
      - for column in mydata.columns():
          for cell in column:
      - for cell in mydata["ColumnName"]:
      - for row in mydata.rows():
          for cell in row:
      - Use mydata.columns_iter() (or rows) if you want
        lazy-load/generators
    """

    def __init__(self, rows_of_strings):
        self._rows = rows_of_strings
        if len(self._rows) < 2:
            raise Exception('Empty CSV!')

    def __getitem__(self, item):
        # Column names return a column
        if isinstance(item, str):
            try:
                index = self._rows[0].index(item)
            except ValueError as err:
                raise Exception("Column name not found!") from err
            return [row[index] for row in self._rows]
        else:
            raise ValueError("Must provide a string column name!")

    def __contains__(self, item):
        if item in self._rows[0]:
            return True
        return False

    def __len__(self):
        return len(self._rows)

    def rows_iter(self):
        # Lazy load
        return (row for row in self._rows)

    def rows(self):
        return list(self.rows_iter())

    def columns_iter(self):
        # Lazy load
        return (self[colname] for colname in self._rows[0])

    def columns(self):
        return [list(col) for col in self.columns_iter()]

In [171]:
traffic = RowColumnView(traffic_rows)
search = RowColumnView(search_rows)

In [172]:
traffic.rows()[:4]

[['Date', 'Version', 'Path', 'Views'],
 ['2024-01-18 00:00:00', 'stable', '/user/file_editor.html', '29'],
 ['2024-01-18 00:00:00', 'stable', '/getting_started/changelog.html', '35'],
 ['2024-01-18 00:00:00', 'latest', '/extension/virtualdom.html', '8']]

In [173]:
search.rows()[:4]

[['Created Date', 'Query', 'Total Results'],
 ['2024-01-18 16:03:50', 'manager', '18'],
 ['2024-01-18 16:03:43', 'mappingkernelmanager', '0'],
 ['2024-01-18 15:47:24', 'mapping', '2']]

In [174]:
list(reversed(sorted(search.rows(), key=lambda k: k[2])))[:32]

[['Created Date', 'Query', 'Total Results'],
 ['2024-01-16 16:49:29', 't kinter', '97'],
 ['2023-11-05 13:53:29', 'delete hidden cells', '95'],
 ['2023-12-03 17:48:58', 'hidden cells', '95'],
 ['2023-12-21 17:47:37', 'application', '95'],
 ['2023-11-05 13:46:49', 'hidden cells', '94'],
 ['2023-10-27 16:36:10', 'cells', '93'],
 ['2023-11-05 17:29:28', 'module', '93'],
 ['2024-01-09 06:26:04',
  'the default implementation of the editor model.',
  '93'],
 ['2023-10-20 07:59:45', 'installation', '9'],
 ['2023-10-20 08:16:26', 'template', '9'],
 ['2023-10-20 08:51:47', 'shortcut', '9'],
 ['2023-10-20 21:50:28', 'shortcut', '9'],
 ['2023-10-21 14:05:39', 'hdfs location', '9'],
 ['2023-10-21 15:27:49', 'debug', '9'],
 ['2023-10-22 17:02:21', 'shortcut', '9'],
 ['2023-10-22 17:28:16', 'shortcut', '9'],
 ['2023-10-22 19:09:09', 'shortcut', '9'],
 ['2023-10-23 03:38:44', 'template', '9'],
 ['2023-10-23 17:00:31', 'shortcut', '9'],
 ['2023-10-24 02:02:56', 'config', '9'],
 ['2023-10-24 08:02:23'