-
Notifications
You must be signed in to change notification settings - Fork 1
204 lines (171 loc) · 9.01 KB
/
ai-api-calls.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
# This is a GitHub Action workflow that test AIGC, the pipeline has tested below steps:
# * Call OpenAI API to generate a speaking script per a prompt request from json input
# * Configure Workload Identity Federation to let Github Actions access GCP via OAUTH temporary token
# * Generate speaking audio via Google text-to-speech API
# * Call Studio.d-id API to generate a talk (mp3) speaking the above script
# * Get the url of the saved mp3 file
# * Call Video AI API to let the above person speak out the above script
name: AIGC Pipeline
# Controls when the workflow will run
on:
# Triggers the workflow on push or pull request events but only for the main branch
push:
branches: [ main ]
pull_request:
branches: [ main ]
# Allows you to run this workflow manually from the Actions tab
workflow_dispatch:
env:
INPUT_PATH: aigc-pipeline
PROMPT_GPT_JSON: chatgpt_pmt2.json
SPEAKING_SCRIPT_FILE: chatgpt-speak-script.txt
OUTPUT_GCS: gs://chatgpt-output
PROMPT_VOICE_JSON: gcp-voice.json
SPEAKING_AUDIO_RAW_FILE: gcp-voice-raw.txt
SPEAKING_AUDIO_MP3_FILE: gcp-voice-output.mp3
PROMPT_D_ID_VOICE_JSON: d-id-voice.json
PROMPT_D_ID_CLIP_JSON: d-id-clip.json
jobs:
chatgpt:
runs-on: ubuntu-latest
# Add "id-token" with the intended permissions.
permissions:
contents: 'read'
id-token: 'write'
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Show Prompt
run: |
cat $PROMPT_GPT_JSON | jq '.messages[0].content'
# Call ChatGPT API to generate a speaking script from a prompt
- name: Call chatgpt API
id: chatgpt
run: |
curl --location --request POST 'https://api.openai.com/v1/chat/completions' \
--header "Authorization: Bearer ${{ secrets.OPENAI_API_KEY }}" \
--header 'Content-Type: application/json' \
-d "@${INPUT_PATH}/$PROMPT_GPT_JSON" \
--fail --silent --show-error --max-time 90 > resp-gpt.json
jq -r '.choices[0].message.content' resp-gpt.json > "$SPEAKING_SCRIPT_FILE"
if (( $(stat -c%s "$SPEAKING_SCRIPT_FILE") < 2 )); then
echo "No Response from ChatGPT, please re-run the job!"
exit 1
fi
- name: Update gcp voice input json
run: |
# cat ${INPUT_PATH}/${PROMPT_VOICE_JSON} | jq '.input.text'
export SPEAKING_SCRIPT=$(cat $SPEAKING_SCRIPT_FILE)
jq --arg newkey "${SPEAKING_SCRIPT}" '.input.text = $newkey' ${INPUT_PATH}/${PROMPT_VOICE_JSON} > tmp-file.json
mv tmp-file.json ${INPUT_PATH}/${PROMPT_VOICE_JSON}
echo 'Done json update'
cat ${INPUT_PATH}/${PROMPT_VOICE_JSON} | jq .
# Configure Workload Identity Federation and generate an access token.
- id: 'auth'
name: 'Authenticate to Google Cloud'
uses: 'google-github-actions/auth@v1'
with:
token_format: 'access_token' # <--
workload_identity_provider: "${{ secrets.GH_GCP_WORKLOAD_IDENTITY_PROVIDER }}"
service_account: "${{ secrets.GH_GCP_SERVICE_ACCOUNT }}"
access_token_lifetime: '300s' # optional, default: '3600s' (1 hour)
# Generate audio Guide: https://cloud.google.com/text-to-speech/docs/create-audio-text-command-line
# For demo only, below can process around 1-2 minutes audio, refer to this guide to generate long audio from text https://cloud.google.com/text-to-speech/docs/create-audio-text-command-line-long-audio-synthesis
- name: Call GOOGLE text-to-speech API
id: text_to_speech
run: |
curl -X POST \
-H "Authorization: Bearer $(gcloud auth application-default print-access-token)" \
-H "Content-Type: application/json; charset=utf-8" \
-d @${INPUT_PATH}/${PROMPT_VOICE_JSON} \
"https://texttospeech.googleapis.com/v1/text:synthesize" \
--fail --silent --show-error --max-time 90 > resp-full-gcp-audio.json
# export RESPONSE_GCP_AUDIO_RAW=$(cat resp-full-gcp-audio.json | jq -r '.audioContent')
# export RESPONSE_GCP_AUDIO_RAW='This is a text file'
jq -r '.audioContent' resp-full-gcp-audio.json | base64 -d > $SPEAKING_AUDIO_MP3_FILE
# Check file size of the audio output
if (( $(stat -c%s "$SPEAKING_AUDIO_MP3_FILE") < 5 )); then
echo "No audio from Google Text-to-Speech, please re-run the job!"
exit 1
fi
# Install gcloud, `setup-gcloud` automatically picks up authentication from `auth`.
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
# Upload file to gcs storage
- name : Upload file
run: |
gsutil cp $SPEAKING_SCRIPT_FILE $OUTPUT_GCS
gsutil cp $SPEAKING_AUDIO_MP3_FILE $OUTPUT_GCS
# Now you can run gcloud commands authenticated as the impersonated service account.
- id: 'gcloud'
name: 'Check File from the Storage'
run: |-
gsutil cat $OUTPUT_GCS/$SPEAKING_SCRIPT_FILE
echo "audio file: $OUTPUT_GCS/$SPEAKING_AUDIO_MP3_FILE"
# Call Image AI API to generate a picture of a person from a prompt
# Unfortunately, the official statement from Midjourney still is that they currently have no plans for an API in the near future.
# Call Studio.d-id API to generate a talk speaking the above script
# I paid the Lite plan on https://studio.d-id.com which can generate audio and animation, but not allowed to generate video clips, which is on Premium plan
# # Tested, uncomment when got new subscription
# - name: Update D-ID voice input json
# run: |
# # cat ${INPUT_PATH}/${PROMPT_D_ID_VOICE_JSON} | jq '.input.text'
# export SPEAKING_SCRIPT=$(cat $SPEAKING_SCRIPT_FILE)
# jq --arg newkey "${SPEAKING_SCRIPT}" '.script.input = $newkey' ${INPUT_PATH}/${PROMPT_D_ID_VOICE_JSON} > tmp-file.json
# mv tmp-file.json ${INPUT_PATH}/${PROMPT_D_ID_VOICE_JSON}
# echo 'Done json update'
# cat ${INPUT_PATH}/${PROMPT_D_ID_VOICE_JSON} | jq .
# # Tested, uncomment when got new subscription
# - name: Generate D_ID voice talk
# run: |
# curl --request POST \
# --url https://api.d-id.com/talks \
# --header 'accept: application/json' \
# --header "authorization: Bearer ${{ secrets.D_ID_API_KEY }}" \
# --header 'content-type: application/json' \
# --data @${INPUT_PATH}/${PROMPT_D_ID_VOICE_JSON} > tmp-d-id-resp.json
# D_ID_TALK_STATUS=$(cat tmp-d-id-resp.json | jq '.status')
# if [[ "$D_ID_TALK_STATUS" != "created" ]]; then
# echo "API status error from D_ID Talk Task: $D_ID_TALK_STATUS"
# cat tmp-d-id-resp.json
# exit 1
# else
# D_ID_TALK_ID=$(cat tmp-d-id-resp.json | jq '.id')
# echo "Talk created, task id: $D_ID_TALK_ID"
# fi
# # Tested, uncomment when got new subscription
# - name: List D_ID voice talk
# run: |
# curl --request GET \
# --url https://api.d-id.com/talks \
# --header 'accept: application/json' \
# --header "authorization: Bearer ${{ secrets.D_ID_API_KEY }}" > tmp-d-id-resp.json
# cat tmp-d-id-resp.json | jq '.talks[].audio_url'
# # Tested, uncomment when got new subscription
# - name: Update D-ID video input json
# run: |
# # cat ${INPUT_PATH}/${PROMPT_D_ID_CLIP_JSON} | jq '.input.text'
# export SPEAKING_SCRIPT=$(cat $SPEAKING_SCRIPT_FILE)
# jq --arg newkey "${SPEAKING_SCRIPT}" '.script.input = $newkey' ${INPUT_PATH}/${PROMPT_D_ID_CLIP_JSON} > tmp-file.json
# mv tmp-file.json ${INPUT_PATH}/${PROMPT_D_ID_CLIP_JSON}
# echo 'Done json update'
# cat ${INPUT_PATH}/${PROMPT_D_ID_CLIP_JSON} | jq .
# # Tested via API document console, uncomment when got new subscription
# # Call Video AI API to let the above person speak out that script from a prompt
# I paid the Lite plan on https://studio.d-id.com which can generate audio and animation, but not allowed to generate video clips, which is on Premium plan
# - name: Generate video
# curl --request POST \
# --url https://api.d-id.com/clips \
# --header 'accept: application/json' \
# --header "authorization: Bearer ${{ secret.D_ID_API_KEY }}" \
# --header 'content-type: application/json' \
# --data @${INPUT_PATH}/${PROMPT_D_ID_CLIP_JSON}
# Print out the link and password for user convenience
# Need to verify the key path of '.clips[].clip_url'
# - name : Print link of the video clips
# run : |
# curl --request GET \
# --url https://api.d-id.com/clips \
# --header 'accept: application/json' \
# --header "authorization: Bearer ${{ secret.D_ID_API_KEY }}" > tmp-d-id-resp.json
# cat tmp-d-id-resp.json | jq '.clips[].clip_url'