In [None]:
import arkouda as ak
import numpy as np
import pandas as pd
import inspect

from arkouda import Series, DataFrame
from typing import List, Optional, Tuple, Union
from pandas.testing import assert_frame_equal

ak.connect()

# Example 1

In [None]:
data = {"key1":["valuew","valuex","valuew","valuex"],
        "key2":["valueA","valueB","valueA","valueB"],
        "key3":["value1","value2","value3","value4"],
        "count":[34,25,11,4],
        "nums":[1,2,5,21]}

pd_df = pd.DataFrame(data)
ak_df = ak.DataFrame({k:ak.array(v) for k,v in data.items()})
#spark_df = spark.createDataFrame(pd_df)

print("\nPandas DF\n")
display(pd_df)

print("\nArkouda DF\n")
display(ak_df)


In [None]:
pd_result1 = pd_df.groupby(["key1","key2"]).count()
ak_result1 = ak_df.groupby(["key1","key2"], as_index = False).count()

print("\nPandas Result\n")
print("Type: " + str(type(pd_result1)) + "\n")
display(pd_result1)

print("\nArkouda Result\n")
print("Type: " + str(type(ak_result1)) + "\n")
display(ak_result1)


In [None]:
pd_result1 = pd_df.groupby(["key1", "key2"], as_index=False).count().drop(['nums','key3'], axis=1)
display(pd_result1)
ak_result1 = ak_df.groupby(["key1", "key2"], as_index=False).count()
display(ak_result1)
assert_frame_equal(pd_result1, ak_result1.to_pandas(retain_index=True))
assert type(ak_result1) == ak.dataframe.DataFrame

# Example 2

In [None]:
ak_df = DataFrame({"gb_id":ak.randint(0, 5, 20, dtype=ak.int64),
                   "float64":ak.randint(0, 1, 20, dtype=ak.float64),
                  "int64":ak.randint(0, 10, 20, dtype=ak.int64),
                    "uint64":ak.randint(0, 10, 20, dtype=ak.uint64),
                   "bigint":ak.randint(0, 10, 20, dtype=ak.uint64)+ 2**200
                  }
                    )
display(ak_df)

In [None]:
pd_df = ak_df.to_pandas()
display(pd_df)

In [None]:
pd_df.groupby('gb_id').count()

In [None]:
ak_df.groupby('gb_id').count(as_series=False)

In [None]:
pd_df.groupby('gb_id').count().drop(["int64","uint64","bigint"],axis=1).rename(columns={"float64": "count"}, errors="raise")

In [None]:
assert_frame_equal(ak_df.groupby('gb_id',as_index=True).count(as_series=False).to_pandas(retain_index=True), 
                   pd_df.groupby('gb_id').count().drop(["int64","uint64","bigint"],axis=1).rename(columns={"float64": "count"}, errors="raise"))

In [None]:
pd_df.groupby('gb_id').count().drop(["int64","uint64","bigint"],axis=1).rename(columns={"float64": "count"}, errors="raise")

In [None]:
ak_df.groupby('gb_id').count(as_series=False).to_pandas(retain_index=True)

In [None]:
## Example 3

In [None]:
ak_df = DataFrame({"gb_id":["A","B","A","A","B"],
                   "nums1":[1.0,2.0,float("nan"),float("nan"),float("nan")],
                   "nums2":[3.0,4.0,5.0,float("nan"),float("nan")],
                  }
                    )
display(ak_df)

In [None]:
pd_df = ak_df.to_pandas()
display(pd_df)

In [None]:
pd_df.groupby('gb_id').count()

In [None]:
ak_df.groupby('gb_id').count()

In [None]:
ak_df.groupby('gb_id').size()

# Code Inspection

In [None]:
lines = inspect.getsource(pd.core.groupby.generic.DataFrameGroupBy.count)
print(lines)

In [None]:
lines = inspect.getsource(ak.dataframe.GroupBy.count)
print(lines)

# More Examples

In [None]:
def build_ak_df_example2():
    data = {"key1":["valuew", "valuex", "valuew", "valuex"], "key2":["valueA", "valueB", "valueA", "valueB"], "key3":["value1", "value2", "value3", "value4"], "count":[34, 25, 11, 4], "nums":[1, 2, 5, 21]}
    ak_df = ak.DataFrame({k:ak.array(v) for k, v in data.items()})
    return ak_df


def build_ak_df_example3():
    ak_df = ak.DataFrame({"gb_id":ak.randint(0, 5, 20, dtype=ak.int64),
               "float64":ak.randint(0, 1, 20, dtype=ak.float64),
              "int64":ak.randint(0, 10, 20, dtype=ak.int64),
                "uint64":ak.randint(0, 10, 20, dtype=ak.uint64),
               "bigint":ak.randint(0, 10, 20, dtype=ak.uint64) + 2 ** 200
              }
                )
    return ak_df

In [None]:
    def build_pd_df():
        username = ["Alice", "Bob", "Alice", "Carol", "Bob", "Alice"]
        userid = [111, 222, 111, 333, 222, 111]
        item = [0, 0, 1, 1, 2, 0]
        day = [5, 5, 6, 5, 6, 6]
        amount = [0.5, 0.6, 1.1, 1.2, 4.3, 0.6]
        bi = [2 ** 200, 2 ** 200 + 1, 2 ** 200 + 2, 2 ** 200 + 3, 2 ** 200 + 4, 2 ** 200 + 5]
        ui = (np.arange(6).astype(ak.uint64)) + 2 ** 63
        return pd.DataFrame(
            {
                "userName": username,
                "userID": userid,
                "item": item,
                "day": day,
                "amount": amount,
                "bi": bi,
                "ui": ui,
            }
        )

In [None]:

    def build_ak_df():
        return ak.DataFrame(build_pd_df())