## Python

In [5]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
from pathlib import Path
from pyarrow.feather import write_feather, read_feather

def convert_to_extension_dtypes(data):
    """
    Convert the data types of a given DataFrame or Series to Pandas extension data types.
    
    :param data: DataFrame or Series
    :return: DataFrame or Series with updated data types
    """
    # Check if the input is a DataFrame or Series
    if not isinstance(data, (pd.DataFrame, pd.Series)):
        raise ValueError("Input must be a pandas DataFrame or Series")

    # Convert data types for DataFrame
    if isinstance(data, pd.DataFrame):
        for col in data.columns:
            col_data = data[col]
            # Convert to appropriate extension dtype
            # if pd.api.types.is_integer_dtype(col_data):
            #     data[col] = col_data.astype("Int64")
            # elif pd.api.types.is_float_dtype(col_data):
            #     data[col] = col_data.astype("Float64")
            
            if pd.api.types.is_string_dtype(col_data):
                data[col] = col_data.astype("string")
            elif pd.api.types.is_bool_dtype(col_data):
                data[col] = col_data.astype("boolean")

    # Convert data type for Series
    else:
        if pd.api.types.is_integer_dtype(data):
            data = data.astype("Int64")
        elif pd.api.types.is_float_dtype(data):
            data = data.astype("Float64")
        elif pd.api.types.is_string_dtype(data):
            data = data.astype("string")
        elif pd.api.types.is_bool_dtype(data):
            data = data.astype("boolean")

    return data

wdir = '/home/yu/OneDrive/Construal'
os.chdir(wdir)

In [7]:
import shutil

pid = 1000117510
img = Path(f'/home/yu/chaoyang/research-resources/kickstart-raw-from-amrita/kickstarter-image/{pid}/profile_full.jpg')

# Specify the destination folder
destination_folder = Path('code/v2')

# Copy the file to the destination folder
shutil.copy(img, destination_folder)


'code/v2/profile_full.jpg'

In [None]:
import os
import pandas as pd

from flask import Flask, request, render_template
from pathlib import Path
from mmdet.apis import DetInferencer
from PIL import Image
from pyarrow.feather import read_feather
from scipy.stats import percentileofscore

wdir = Path('/home/yu/OneDrive/Construal/code/v2/app/dev')
os.chdir(wdir)

inferencer = DetInferencer(
    # Deformable-DETR (DETR is faster)
    model=str(
        wdir
        / "data/V3Det/checkpoints/configs/v3det/deformable-detr-refine-twostage_swin_16xb2_sample1e-3_v3det_50e.py"
    ),
    weights=str(wdir / "data/V3Det/checkpoints/Deformable_DETR_V3Det_SwinB.pth"),
    device="cpu",
)

img_path = Path('/home/yu/OneDrive/Construal/code/v2/profile_full.jpg')

# Get the objects in the image
objects = inferencer(str(img_path), show=False)["predictions"][0]


In [None]:

# get size of each object
obj_size = [w * h for x, y, w, h in objects["bboxes"]]

# get the image size
with Image.open(img_path) as img:
    w, h = img.size
    img_size = w * h

# get the ratio of each object
obj_size_ratio = [x / img_size for x in obj_size]

# collect the results into a dataframe
df = pd.DataFrame(
    {
        "name": img_path.stem,
        "label": objects["labels"],
        "score": objects["scores"],
        "size_ratio": obj_size_ratio,
    }
)

In [3]:
df.loc[df.score>=0.1]

Unnamed: 0,name,label,score,size_ratio
0,profile_full,834,0.454427,1.0
1,profile_full,262,0.37287,1.0
2,profile_full,816,0.242126,1.0
3,profile_full,822,0.171373,1.0
4,profile_full,869,0.144089,1.0
5,profile_full,13187,0.117059,1.0
6,profile_full,13110,0.115787,1.0
7,profile_full,470,0.106532,1.0
8,profile_full,830,0.102406,1.0


## R

In [None]:
library(arrow)

wdir = '/home/yu/OneDrive/Construal/'
setwd(wdir)

In [None]:
x = read_feather("data/v2/proj_metrics_p10.feather")
sum = x[, .(pid, mni_k100_v3d, freq_v3d, obj_num, obj_size_lt_10)]
sum[1]

avg = sum[, .(
    pid, mni=mni_k100_v3d/obj_num, 
    unique_v3d=freq_v3d/obj_num,
    readability=0.4 * (obj_num + 100 * obj_size_lt_10 / obj_num))]
avg[1]
