In [1]:
import os
import sys

from typing import List

from alibabacloud_devops20210625.client import Client as devops20210625Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_tea_util import models as util_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_devops20210625 import models as devops_20210625_models
# print(os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'], os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'])
config = open_api_models.Config(
            access_key_id=os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'],
            access_key_secret=os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET']
        )
# Endpoint 请参考 https://api.aliyun.com/product/devops
config.endpoint = f'devops.cn-hangzhou.aliyuncs.com'
client = devops20210625Client(config)

In [2]:
# 项目列表

import json
import pandas as pd

runtime = util_models.RuntimeOptions()
headers = {}
organization_id = "6189f099041d450d2c253abc"
project_id = "b0ea693229aa73b6dc479f2a79"

# print(client.list_organizations(request=devops_20210625_models.ListOrganizationsRequest()).body)

req = devops_20210625_models.ListProjectWorkitemTypesRequest(
    space_type="Project", category="req"
)
res = client.list_project_workitem_types_with_options(
    organization_id=organization_id,
    project_id=project_id,
    headers=headers,
    runtime=runtime,
    request=req,
)
# print(res.body)

req_id = "1a787d02dbffcf9bbe93e73631"

req = devops_20210625_models.ListWorkItemAllFieldsRequest(
    space_type="Project", space_identifier=project_id, workitem_type_identifier=req_id
)
res = client.list_work_item_all_fields_with_options(
    organization_id=organization_id, headers=headers, runtime=runtime, request=req
)

# print(res.body)
# all_fields=json.loads(res.body)

fields_df = pd.DataFrame(res.body.to_map()["fields"])
fields_df.describe()

start_date='2024-04-01'
end_date='2024-06-30'

conditions = {
    "conditionGroups": [
        [
            {
                "fieldIdentifier": "status",
                "operator": "CONTAINS",
                "value": ["100014"],
                "toValue": None,
                "className": "status",
                "format": "list",
            },
            {
                "fieldIdentifier": "finishTime",
                "operator": "BETWEEN",
                "value": [f"{start_date} 00:00:00"],
                "toValue": f"{end_date} 23:59:59",
                "className": "date",
                "format": "input",
            },
            {
                "fieldIdentifier": "tag",
                "operator": "CONTAINS",
                "value": ["9b54f659849ff0b0f358dfc360", "7e73d82d57a5dde1e51c511a7f"],
                "toValue": None,
                "className": "tag",
                "format": "multiList",
            },
        ]
    ]
}

work_item_condition = json.dumps(conditions)
# '{"conditionGroups":[[{"fieldIdentifier":"status","operator":"CONTAINS","value":["63798bd5f6855ea51abcd1b0d6"],"toValue":null,"className":"status","format":"list"},{"fieldIdentifier":"workitemType","operator":"CONTAINS","value":["1a787d02dbffcf9bbe93e73631"],"toValue":null,"className":"workitemType","format":"list"}]]}'
req = devops_20210625_models.ListWorkitemsRequest(
    space_type="Project",
    category="Req",
    conditions=work_item_condition,
    max_results=100,
    search_type="LIST",
    space_identifier=project_id,
)
res = client.list_workitems(
    organization_id=organization_id,
    #    headers=headers,
    #    runtime=runtime,
    request=req,
)

items_array = res.body.to_map()["workitems"]

print(len(items_array))


# 将这段代码转成小方法，参数：workitem_id,organization_id
def get_work_item_info(workitem_id, organization_id):
    work_item = client.get_work_item_info(
        workitem_id=workitem_id, organization_id=organization_id
    )
    return work_item.body.to_map()["workitem"]


# 循环items_array，使用每个workitem的id，调用get_work_item_info方法，获取workitem的详细信息，并将结果保存到一个列表中
work_item_info_list = []
for item in items_array:
    work_item_info_list.append(get_work_item_info(item["identifier"], organization_id))

9


In [3]:
fields_dict = {}
for index, row in fields_df.iterrows():
    row_data = row.to_dict()  # Convert the row to a dictionary
    fields_dict[row_data['identifier']] = row_data['name']

fields_dict

{'subject': '标题',
 'description': '描述',
 'parent': '父项ID',
 'parentWhetherExist': '父项是否存在',
 'parentSubject': '父项标题',
 'workitemType': '工作项类型',
 'status': '状态',
 'assignedTo': '负责人',
 'e4c05dae33ce16b6e3ed4f65ea': '测试负责人',
 'priority': '优先级',
 'space': '归属项目',
 'sprint': '迭代',
 'ak.issue.member': '参与者',
 'workitem.tracker': '抄送',
 'tag': '标签',
 '79': '计划开始时间',
 '80': '计划完成时间',
 'relatedSpace': '共享项目',
 'a6f2cf7e6c69e5006ac4cd5146': '变更后完成时间',
 'a08493245a9cf9fe250677233e': '产品验收不通过原因',
 '2000c2aad751b7871d0895d7d4': '需求评审不通过原因',
 'edd9f7d98179e7f47fae41b135': 'UI评审不通过原因',
 '2c322e0439231d088adafdf7c2': 'UI验收不通过原因',
 '51ec6c83a2b9c10d5b437ab616': '提测打回原因',
 '8159aca815ebf0b25a9e4d0d67': '提测打回责任人',
 '1cf9f1e546618af480f9cec18c': '需求延期类型',
 'b37f146595fd4734d69675690c': '需求延期原因',
 '16b58fbf0eb1556eaacffcd6d5': '延期责任人'}

In [4]:
fields_dict

# 获取工作项的活动记录
def get_work_item_activities(workitem):
    activities=client.get_work_item_activity(organization_id=organization_id, workitem_id=workitem['identifier']).body
    activities_dict={}
    for act in activities.to_map()['activities']:
        if act['eventType'] == 'workitem.transitioned':
            activity_name = act['newValue'][0]['displayValue']
            if activity_name in activities_dict:
                new_value=pd.to_datetime(act['eventTime'], unit='ms').strftime('%Y-%m-%d %H:%M:%S')
                old_value=activities_dict[activity_name]
                if new_value > old_value:
                    activities_dict[activity_name]=new_value
            else:
                activities_dict[activity_name]=pd.to_datetime(act['eventTime'], unit='ms').strftime('%Y-%m-%d %H:%M:%S')
    print(activities_dict)
    workitem['activities_dict']=activities_dict
    return workitem

# 遍历所有项目，获取每个项目的活动记录
for work_item in work_item_info_list:
    workitems=get_work_item_activities(work_item)

for item in work_item_info_list:
    custom_fields_dict = {}
    for custom_field in item['customFields']:
        fieldIdentifier=custom_field['fieldIdentifier']
        if fieldIdentifier in fields_dict:
            custom_fields_dict[fields_dict[fieldIdentifier]] = custom_field['value']
        else:
            custom_fields_dict[fieldIdentifier] = custom_field['value']
    print(custom_fields_dict)
    get_work_item_activities(workitem=item)
    item['custom_fields_dict'] = custom_fields_dict

{'已完成': '2024-04-08 10:39:18', '测试中': '2024-04-02 09:15:24', '待测试': '2024-04-02 09:15:16', '开发中': '2024-04-01 02:07:58', '需求&UI评审完成': '2024-03-28 10:59:42'}
{'已完成': '2024-04-01 11:33:25', '待发布': '2024-04-01 11:28:46', '待产品&UI验收': '2024-03-29 10:46:25', '测试中': '2024-03-29 10:46:10', '待测试': '2024-03-29 10:45:58', '开发中': '2024-03-28 08:26:35', '需求&UI评审完成': '2024-03-28 08:26:22', '待需求评审': '2024-03-27 11:21:00'}
{'已完成': '2024-04-08 11:08:12', '待产品&UI验收': '2024-04-08 07:26:11', '测试中': '2024-04-07 02:25:09', '待测试': '2024-04-07 02:25:07', '开发中': '2024-04-02 02:48:02', '技术评审完成': '2024-04-02 02:47:47', '技术方案设计中': '2024-04-01 10:26:12', '需求&UI评审完成': '2024-03-28 02:13:01', '待需求评审': '2024-03-27 10:29:38', '待需求预审': '2024-03-27 10:29:34'}
{'已完成': '2024-04-16 01:56:19', '待产品&UI验收': '2024-04-12 10:38:31', '测试中': '2024-04-12 10:38:26', '待测试': '2024-04-12 02:55:51', '开发中': '2024-04-08 07:25:58', '技术评审完成': '2024-04-08 07:25:46', '技术方案设计中': '2024-04-07 09:45:56', '需求&UI评审完成': '2024-04-01 02:11:08', '待需求评审'

In [5]:
items_df=pd.DataFrame(work_item_info_list)
items_df['gmtModified'] = pd.to_datetime(items_df['gmtModified'], unit='ms')
items_df['finishTime'] = pd.to_datetime(items_df['finishTime'], unit='ms')
items_df['gmtCreate'] = pd.to_datetime(items_df['gmtCreate'], unit='ms')
items_df['updateStatusAt'] = pd.to_datetime(items_df['updateStatusAt'], unit='ms')
# Convert the specific key value to datetime
items_df['研发开始时间'] = items_df['custom_fields_dict'].apply(lambda x: pd.to_datetime(x.get('计划开始时间')))
items_df['计划发布时间'] = items_df['custom_fields_dict'].apply(lambda x: pd.to_datetime(x.get('计划完成时间')))
# sumActualLaborHour
items_df['需求总工时'] = items_df['custom_fields_dict'].apply(lambda x: pd.to_numeric(x.get('sumActualLaborHour')))

# Calculate the difference in seconds between two datetime columns
items_df['研发人日'] = (items_df['计划发布时间'] - items_df['研发开始时间']).dt.total_seconds() / (24 * 3600)

# Round the result to one decimal place
items_df['研发人日'] = items_df['研发人日'].round(1)
# Create a new column '是否延期' with boolean values
items_df['是否延期'] = items_df['finishTime'].dt.date > items_df['计划发布时间'].dt.date

# Convert boolean values to 1 for True and 0 for False
items_df['是否延期'] = items_df['是否延期'].astype(int)
items_df.describe()

Unnamed: 0,需求总工时,研发人日,是否延期
count,9.0,9.0,9.0
mean,63.555556,14.888889,0.444444
std,49.002197,11.868492,0.527046
min,20.5,3.0,0.0
25%,32.0,5.0,0.0
50%,45.5,11.0,0.0
75%,73.5,21.0,1.0
max,171.5,38.0,1.0


In [6]:
import datetime

yyyymmdd=datetime.datetime.now().strftime('%Y-%m-%d')
items_df.to_csv(f'./data/商城云效-{start_date}~{end_date}-{yyyymmdd}.csv', index=False)

In [7]:
# Set the option to display full column width
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)  # Set to None to display all rows
pd.set_option('display.max_columns', None)  # Set to None to display all columns
pd.set_option('display.width', None)  # Set width to None for automatic wrapping

all_timing_fields=['已完成','待产品&UI验收','待测试','开发中','需求&UI评审完成','待需求评审','测试中','技术评审完成','技术方案设计中','需求设计中','待发布']
items_to_analytics_df=items_df[['subject','研发人日','是否延期','需求总工时','activities_dict','gmtCreate','计划发布时间']]
for timing_field in all_timing_fields:
    items_to_analytics_df[timing_field]=items_to_analytics_df['activities_dict'].apply(lambda x: pd.to_datetime(x.get(timing_field)))

items_to_analytics_df['需求分析时长D'] = (items_to_analytics_df['需求&UI评审完成'] - items_to_analytics_df['需求设计中']).dt.total_seconds() / (24 * 3600)
items_to_analytics_df['需求纯开发时长D'] = (items_to_analytics_df['待测试'] - items_to_analytics_df['开发中']).dt.total_seconds() / (24 * 3600)
items_to_analytics_df['需求纯测试时长D'] = (items_to_analytics_df['待产品&UI验收'] - items_to_analytics_df['待测试']).dt.total_seconds() / (24 * 3600)
items_to_analytics_df['需求纯技术设计时长D'] = (items_to_analytics_df['开发中'] - items_to_analytics_df['需求&UI评审完成']).dt.total_seconds() / (24 * 3600)
# Calculate the difference in seconds between two datetime columns
items_to_analytics_df['研发人日'] = (items_to_analytics_df['已完成'] - items_to_analytics_df['需求&UI评审完成']).dt.total_seconds() / (24 * 3600)
items_to_analytics_df['已完成-需求&UI评审完成'] = (items_to_analytics_df['已完成'] - items_to_analytics_df['需求&UI评审完成']).dt.days
items_to_analytics_df.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  items_to_analytics_df[timing_field]=items_to_analytics_df['activities_dict'].apply(lambda x: pd.to_datetime(x.get(timing_field)))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  items_to_analytics_df['需求分析时长D'] = (items_to_analytics_df['需求&UI评审完成'] - items_to_analytics_df['需求设计中']).dt.total_seconds() / (24 * 3600)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/st

Unnamed: 0,subject,研发人日,是否延期,需求总工时,activities_dict,gmtCreate,计划发布时间,已完成,待产品&UI验收,待测试,开发中,需求&UI评审完成,待需求评审,测试中,技术评审完成,技术方案设计中,需求设计中,待发布,需求分析时长D,需求纯开发时长D,需求纯测试时长D,需求纯技术设计时长D,已完成-需求&UI评审完成
0,tms-配送单配置,10.985833,1,32.0,"{'已完成': '2024-04-08 10:39:18', '测试中': '2024-04-02 09:15:24', '待测试': '2024-04-02 09:15:16', '开发中': '2024-04-01 02:07:58', '需求&UI评审完成': '2024-03-28 10:59:42'}",2024-03-28 10:45:47,2024-04-04,2024-04-08 10:39:18,NaT,2024-04-02 09:15:16,2024-04-01 02:07:58,2024-03-28 10:59:42,NaT,2024-04-02 09:15:24,NaT,NaT,NaT,NaT,,1.296736,,3.630741,10


## 研发人日小于等于10天需求百分比%

In [8]:
import pandas as pd
import pandasql

column_name = 'activities_dict'
if column_name in items_to_analytics_df.columns:
    items_to_analytics_df.drop(columns=column_name, inplace=True)
# items_to_analytics_df.drop("activities_dict")
stat=pandasql.sqldf("""
                    select 
                    count(1) as `需求总数`,
                    count(case when `研发人日` >= 11 then 1 end) as `研发人日大于10天需求数`,
                    round(count(case when `研发人日` < 11 then 1 end)*100.00/count(1),2) as `研发人日小于等于10天需求百分比%`,
                    round(sum(`研发人日`)/count(1),2) as `avg研发人日D`,
                    round(sum(`需求总工时`)/count(1),2) as `avg研发工时H`,
                    count(case when `是否延期` > 0 then 1 end) as `延期需求数`,
                    count(case when `需求总工时` > 100 then 1 end) as `100h以上的需求数`
                    from items_to_analytics_df
                    """)

display(stat)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,需求总数,研发人日大于10天需求数,研发人日小于等于10天需求百分比%,avg研发人日D,avg研发工时H,延期需求数,100h以上的需求数
0,9,6,33.33,17.84,63.56,4,2


In [9]:
stat=pandasql.sqldf("""
                    select 
                    round(avg(`需求分析时长D`),2) as `avg需求分析时长D`,
                    round(avg(`需求纯开发时长D`),2) as `avg需求纯开发时长D`,
                    round(avg(`需求纯测试时长D`),2) as `avg需求纯测试时长D`,
                    round(avg(`需求纯技术设计时长D`),2) as `需求纯技术设计时长D`,
                    count(case when `需求纯开发时长D` >= 7 then 1 end) as `需求纯开发时长D>=7需求数`,
                    count(case when `需求纯测试时长D` >= 3 then 1 end) as `需求纯测试时长D>=3需求数`,
                    count(case when `需求纯技术设计时长D` >= 2 then 1 end) as `需求纯技术设计D>=2需求数`,
                    count(1) as `总需求数`
                    from items_to_analytics_df
                    """)
display(stat)

Unnamed: 0,avg需求分析时长D,avg需求纯开发时长D,avg需求纯测试时长D,需求纯技术设计时长D,需求纯开发时长D>=7需求数,需求纯测试时长D>=3需求数,需求纯技术设计D>=2需求数,总需求数
0,21.1,9.16,2.88,2.76,4,2,5,9


## 研发人日>=10天的项目列表

In [10]:
from IPython.display import display

stat=pandasql.sqldf("""
                    select 
                    subject
                    ,round(`研发人日`,1) as `研发人日D`
                    ,date(`已完成`) as `完成日期`
                    ,date(`需求&UI评审完成`) as `需求评审完成日期`
                    ,date(`开发中`) as `首次进入开发中时间`
                    ,case when date(`需求&UI评审完成`) <= date('2024-02-09') then '春节前评审' else '否' end as `是否经历假期`
                    ,`已完成-需求&UI评审完成` as `研发时长D`
                    ,`需求总工时`,
                    round(`需求总工时`/`已完成-需求&UI评审完成`,1) as `avg工时per研发人日`
                    from items_to_analytics_df
                    where `研发人日` >=11
                    and `需求总工时`>0
                    order by `完成日期` desc
                    """)
# Set the option to display full column width
pd.set_option('display.max_colwidth', None)
display(stat)

Unnamed: 0,subject,研发人日D,完成日期,需求评审完成日期,首次进入开发中时间,是否经历假期,研发时长D,需求总工时,avg工时per研发人日
0,省心送支持全品类商品,15.0,2024-04-25,2024-04-10,2024-04-11,否,15,73.5,4.9
1,需求-【商城】报价单优化,29.8,2024-04-18,2024-03-19,2024-03-22,否,29,111.5,3.8
2,【商城】配送时效展示优化,15.0,2024-04-16,2024-04-01,2024-04-08,否,14,39.5,2.8
3,非日配区开放代销不入仓-商城侧优化,40.9,2024-04-11,2024-03-01,2024-03-04,否,40,49.0,1.2
4,特价圈人优先级优化,11.4,2024-04-08,2024-03-28,2024-04-02,否,11,20.5,1.9
5,需求-【CRM】品类券增加营销费用池,27.1,2024-04-01,2024-03-05,2024-03-07,否,27,171.5,6.4


## 延期的项目

In [11]:
stat=pandasql.sqldf("""
                    select 
                    subject
                    ,round(`研发人日`,1) as `研发人日D`
                    ,date(`已完成`) as `完成日期`
                    ,date(`计划发布时间`) as `计划发布时间`
                    ,date(`需求&UI评审完成`) as `需求评审完成日期`
                    ,date(`开发中`) as `首次进入开发中时间`
                    ,case when date(`需求&UI评审完成`) <= date('2023-10-07') then '十一前评审' else '否' end as `是否经历假期`
                    ,`已完成-需求&UI评审完成` as `研发时长D`
                    ,`需求总工时`,
                    round(`需求总工时`/`已完成-需求&UI评审完成`,1) as `avg工时per研发人日`
                    from items_to_analytics_df
                    where `是否延期` > 0
                    order by `完成日期` desc
                    """)
display(stat)

Unnamed: 0,subject,研发人日D,完成日期,计划发布时间,需求评审完成日期,首次进入开发中时间,是否经历假期,研发时长D,需求总工时,avg工时per研发人日
0,需求-【商城】报价单优化,29.8,2024-04-18,2024-04-15,2024-03-19,2024-03-22,否,29,111.5,3.8
1,非日配区开放代销不入仓-商城侧优化,40.9,2024-04-11,2024-04-08,2024-03-01,2024-03-04,否,40,49.0,1.2
2,tms-配送单配置,11.0,2024-04-08,2024-04-04,2024-03-28,2024-04-01,否,10,32.0,3.2
3,需求-【CRM】品类券增加营销费用池,27.1,2024-04-01,2024-03-21,2024-03-05,2024-03-07,否,27,171.5,6.4


In [12]:
df=items_to_analytics_df[['subject','已完成','计划发布时间']].sort_values('已完成', ascending=False)
df.head(10)

Unnamed: 0,subject,已完成,计划发布时间
5,省心送支持全品类商品,2024-04-25 11:04:45,2024-04-30
6,需求-【商城】报价单优化,2024-04-18 02:12:34,2024-04-15
3,【商城】配送时效展示优化,2024-04-16 01:56:19,2024-04-16
7,非日配区开放代销不入仓-商城侧优化,2024-04-11 02:42:09,2024-04-08
2,特价圈人优先级优化,2024-04-08 11:08:12,2024-04-12
0,tms-配送单配置,2024-04-08 10:39:18,2024-04-04
4,【商城】新增特价标签,2024-04-02 10:26:57,2024-04-02
8,需求-【CRM】品类券增加营销费用池,2024-04-01 11:33:45,2024-03-21
1,【CRM】4月水果任务目标客户新增拜访模板,2024-04-01 11:33:25,2024-04-01
